#!/usr/bin/perl

use DBI;
use Getopt::Long;
use Data::Dumper;
use File::Spec;

$opt_ot_rows=100000;
$opt_ot_2_it=5;
$opt_database="test";
$opt_host="localhost";
$opt_socket="";
$opt_port="";
$opt_datadir="";
$opt_user='root';
$opt_password='';

%data_filename=( ot =>"outer.txt", it=>"inner.txt" );

$opt_generate=$opt_load_data="";

GetOptions("ot-rows=s","ot-2-it=s",
           "database=s", "host=s","socket=s","port=s",
           "user=s","password=s", "datadir=s",
           "generate","load-data") || usage();

usage() unless ($opt_generate || $opt_load_data);

$outer_rows=$opt_ot_rows;
$inner_rows=$outer_rows*$opt_ot_2_it;

if ($opt_datadir && ! -d "$opt_datadir")
{
  die <<EOF;

Directory $opt_datadir doesn't exist. Use --datadir option to specify 
proper datadir.

EOF
}

$opt_datadir=File::Spec->rel2abs($opt_datadir);


if ($opt_generate)
{
  print<<EOF;
  Generating following data:
  outer table
        data file:       $opt_datadir/$data_filename{ot}
        number of rows:  $outer_rows 
  inner table 
        data file:       $opt_datadir/$data_filename{it}
        number of rows:  $inner_rows

EOF

  generate();
}

if ($opt_load_data)
{
  print<<EOF;

  Loading following data files:
  outer table
        data file:       $opt_datadir/$data_filename{ot}
  inner table 
        data file:       $opt_datadir/$data_filename{it}

  to database: $opt_database

EOF

  load_data();
}


sub generate()
{
   generate_outer_table($opt_datadir."/".$data_filename{ot});
   generate_inner_table($opt_datadir."/".$data_filename{it});
}

sub generate_outer_table()
{
  my ($filename)=@_;

  open(OUTER,">$filename") || die "Can't open file $filename";

  $outer_id=new my_ordinal($outer_rows,    1, $outer_rows);
  $outer_key=new my_uniform($outer_rows,   0, 1000000000, $outer_rows);
  $outer_multi=new my_uniform($outer_rows, 0, 1000, 1000);

  for ($i=0;$i<$outer_rows;$i++)
  {
    $outer_expr_key=$outer_expr_nokey=$outer_key->get_next_value();

    print OUTER join(",",$outer_id->get_next_value(),
                         $outer_expr_key,
                         $outer_expr_nokey,
                         $outer_multi->get_next_value()),"\n";
  }
  close(OUTER);
}

sub generate_inner_table()
{
  my ($filename)=@_;

  open(INNER,">$filename") || die "Can't open file $filename" ;

  $inner_id=new my_ordinal($inner_rows,    1, $inner_rows);
  $inner_key=new my_uniform($inner_rows,   0, 1000000000, $inner_rows);
 
  for ($i=0;$i<$inner_rows;$i++)
  {
    $inner_expr_key=$inner_expr_nokey=$inner_key->get_next_value();

    print INNER join(",",$inner_id->get_next_value(),
                         $inner_expr_key,
                         $inner_expr_nokey),"\n";
  }
  close(INNER);
}


sub load_data()
{
  %create_table_stmt=( ot => "create table ot (id int not null,
                                           expr_key int not null,
                                           expr_nokey int not null,
                                           expr_multi int not null,
                                           primary key (id),
                                           key (expr_key))",

                       it => "create table it (id int not null,
                                           expr_key int not null,
                                           expr_nokey int not null,
                                           primary key (id),
                                           key (expr_key))");
 
  $drop_table_stmt="drop table if exists %s";
  $load_data_stmt="load data infile '%s' into table %s FIELDS TERMINATED BY ','";

  $dsn="dbi:mysql:$opt_database;host=$opt_host";
  $dsn.= ";mysql_socket=$opt_socket" if ($opt_socket);

  $dbh=DBI->connect($dsn, $opt_user, $opt_password) || die "Can't connect to MySQL server";
  
  foreach $table (ot,it)
  {
    do_query($dbh,sprintf($drop_table_stmt,$table));
    do_query($dbh,$create_table_stmt{$table});          

    $filename=$opt_datadir."/".$data_filename{$table};
    if ( -f $filename )
    {
      do_query($dbh,sprintf($load_data_stmt,$filename,$table));
    }
    else
    {
      print "ERROR: Can't read file: $filename to load data for table: $table\n";
    }
  }
}

sub do_query
{
  my($dbh, $query)=@_;
  print "$query\n" if ($opt_debug);
  $dbh->do($query) or
    die "\nError executing '$query':\n$DBI::errstr\n";
}
          
sub usage()
{
print <<EOF;

Data generator and loader of generated data for subquery benchmark

--datadir=<directory>
  Directory where script will store result files or look for dataset files
  to load

--generate
  Generate dataset that contsits of two tables 'outer' and 'inner' and store
  generated data in plan text files with the same names

--ot-rows=<number of rows>
  Number of rows in outer(ot) table

--ot-2-in=<N>
  Relation between outer and inner tables(ot:in as 1:N)
  
--load-data
  Load generated dataset with --generate option
  
--database=<database name>
--host=<host name>
--user=<user name>
--password=<password>
--socket=<socket>

EOF
} 

package my_ordinal;

sub new
{
  my ($class,$nrows,$min,$max)=@_;
  my $self={};
  bless $self,$class;

  $self->{'nrows'}= $nrows;
  $self->{'min'}= $min;
  $self->{'max'}= $max;

  $self->{'value'}= 0;
  $self->{'resvalue'}= '';
  $self->{'range'}= $max-$min;
  $self->{'step'}= 1;

  #range sparse?
  if ($self->{'nrows'} < $self->{'range'})
  {
    $self->{'step'}=$self->{'range'}/($self->{'nrows'}-1);
  }

  return $self;
}

sub get_next_value
{
  my ($self)=@_;
  
  $self->{'resvalue'}= sprintf("%.0f", $self->{'min'} + $self->{'value'});
  $self->{'value'}+= $self->{'step'};
  if ($self->{'value'} > $self->{'range'})
  {
    $self->{'value'}=0;
  }
  return $self->{'resvalue'};
}

package my_uniform;

sub new
{
  my ($class,$nrows,$min,$max,$uniques)=@_;
  my $self={};
  bless $self,$class;

  @root=(2, 7, 26, 59, 242, 568, 1792, 5649, 16807, 30001, 
         60010, 180001, 360002, 1000001, 2000000);

  @prime=(11, 101, 1009, 10007, 100003, 1000003, 10000019,
          100000007, 2147483647, 10000000019, 100000000003,
          1000000000039, 10000000000037, 100000000000031,
          1000000000000037);

  $self->{'nrows'}= $nrows;
  $self->{'min'}= $min;
  $self->{'max'}= $max;
  $self->{'uniques'}= $uniques;

  $self->{'range'}= $max-$min;

  $self->{'index'}=(log($self->{'range'})/log(10));

  $self->{'G'}= $root[$self->{'index'}];
  $self->{'P'}= $prime[$self->{'index'}];
  $self->{'seed'}= $self->{'G'};
  $self->{'uniques_count'}=0;

  return $self;
}

sub get_next_value()
{
  my ($self)=@_;

  if ($self->{'uniques_count'} >= $self->{'uniques'})
  {
    $self->{'G'}= $root[$self->{'index'}];
    $self->{'P'}= $prime[$self->{'index'}];
    $self->{'seed'}= $self->{'G'};
    $self->{'uniques_count'}=0;
  }
  $self->{'resvalue'}=$self->{'min'}+$self->get_uniform();
  $self->{'uniques_count'}++;

  return $self->{'resvalue'};
}

sub get_uniform()
{
  my ($self)=@_;

  $self->{'seed'} = ($self->{'G'} * $self->{'seed'}) % $self->{'P'};
  while ( $self->{'seed'} > $self->{'range'})
  {
    $self->{'seed'}= ($self->{'G'}* $self->{'seed'}) % $self->{'P'};
  }
  return ($self->{'seed'} - 1);
}
