--- /usr/bin/ndb_size.pl 2009-11-17 01:24:22.000000000 +0000 +++ ndb_size.pl 2010-02-11 11:26:22.000000000 +0000 @@ -34,6 +34,8 @@ # We currently estimate sizes for: 4.1, 5.0 and 5.1 to various amounts # of accurracy. # +# We also do a few rudimentary checks to see if tables are creatable on NDB cluster +# # There is no warranty. # # BUGS @@ -42,7 +44,15 @@ # - some float stores come out weird (when there's a comma e.g. 'float(4,1)') # - no disk data values # - computes the storage requirements of views (and probably MERGE) -# - ignores character sets? +# - ignores part keys fail on NDB Cluster e.g. CREATE TABLE $table (a int, b varchar(20), c int, primary key (a,b(10)), key (c)); + +use constant { CHARLEN_DEFAULT => 1, + CHARLEN_UTF8 => 3, # could become 4 in future MySQL/NDB releases + CHARLEN_UTF2 => 2, + CHARLEN_UTF16 => 2, # could become 4 in future MySQL/NDB releases + }; + + package MySQL::NDB::Size::Parameter; @@ -80,6 +90,7 @@ type is_varsize size + charlen Key) ], hash => 'dm_overhead', scalar => [{ -default => 4 },'align'], @@ -417,10 +428,11 @@ $debug, $format, $excludetables, - $excludedbs); + $excludedbs, + $charset); GetOptions('database|d=s'=>\$database, - 'hostname=s'=>\$hostname, + 'hostname|h=s'=>\$hostname, 'socket=s'=>\$socket, 'user|u=s'=>\$user, 'password|p=s'=>\$password, @@ -428,7 +440,8 @@ 'loadqueries|l=s'=>\$loadqueries, 'excludetables=s'=>\$excludetables, 'excludedbs=s'=>\$excludedbs, - 'help|usage|h!'=>\$help, + 'default-character-set=s'=>\$charset, + 'help|usage|?!'=>\$help, 'debug'=>\$debug, 'format|f=s'=>\$format, ); @@ -449,6 +462,7 @@ print STDERR "\t--user and --password default to empty string\n"; print STDERR "\t--format=(html|text) Output format\n"; print STDERR "\t--excludetables Comma separated list of table names to skip\n"; + print STDERR "\t--default-character-set=(utf8|utf2|utf16) Assume all CHAR/VARCHAR columns in tables will be storing this character set\n"; print STDERR "\t--excludedbs Comma separated list of database names to skip\n"; print STDERR "\t--savequeries= saves all queries to the DB into \n"; print STDERR "\t--loadqueries= loads query results from . Doesn't connect to DB.\n"; @@ -493,6 +507,8 @@ $excludetables = join (',', map { $dbh->quote($_) } split ',', $excludetables ) if $excludetables; +$charset='default' if (!defined($charset)); + if(!$loadqueries) { if (scalar(keys %withdb)>1) @@ -518,6 +534,7 @@ $report->versions('4.1','5.0','5.1'); my $tables; +my $infoschema=1; if($loadqueries) { @@ -525,7 +542,7 @@ } else { - my $sql= "select t.TABLE_NAME,t.TABLE_SCHEMA " . + my $sql= "select t.TABLE_NAME,t.TABLE_SCHEMA,t.TABLE_COLLATION " . " from information_schema.TABLES t " . " where t.TABLE_SCHEMA in ( $dblist ) "; @@ -537,6 +554,7 @@ if (!$tables) { print "WARNING: problem selecing from INFORMATION SCHEMA ($sql)\n"; + $infoschema=0; if ($#dbs>0) { print "\t attempting to fallback to show tables from $database"; $tables= $dbh->selectall_arrayref("show tables from $database\n"); @@ -560,7 +578,7 @@ foreach my $colname (keys %$info) { my $col= new MySQL::NDB::Size::Column(name => $colname); - my ($type, $size); + my ($type, $size, $charlen); $col->Key($$info{$colname}{Key}) if(defined($$info{$colname}{Key}) &&$$info{$colname}{Key} ne ''); @@ -584,8 +602,24 @@ { $type= $$info{$colname}{Type}; } + + # Work out byte length + $charlen=CHARLEN_DEFAULT; # default + if ($charset =~/utf8/i) { + $charlen=CHARLEN_UTF8; # Force UTF8 + } elsif ($charset =~/utf2/i) { + $charlen=CHARLEN_UTF2; # Force UTF2 + } elsif ($charset =~/utf16/i) { + $charlen=CHARLEN_UTF16; # Force UTF16 + } elsif (defined($$info{$colname}{Character_Set_Name})) { + $charlen=CHARLEN_UTF8 if ($$info{$colname}{Character_Set_Name} =~/utf8/i); + $charlen=CHARLEN_UTF16 if ($$info{$colname}{Character_Set_Name} =~/utf16/i); + $charlen=CHARLEN_UTF2 if ($$info{$colname}{Character_Set_Name} =~/utf2/i); + } + $col->type($type); $col->size($size); + $col->charlen($charlen); if($type =~ /tinyint/) {$col->dm(1)} @@ -629,8 +663,13 @@ } elsif($type =~ /varchar/ || $type =~ /varbinary/) { - my $fixed=$size+ceil($size/256); - $col->dm_overhead_set('length' => ceil($size/256)); + my $colsize=$size; + if ($type =~ /varchar/) { + $colsize=$size*$charlen; + } + + my $fixed=$colsize+ceil($colsize/256); + $col->dm_overhead_set('length' => ceil($colsize/256)); $col->dm($fixed); if(!$col->Key()) # currently keys must be non varsized { @@ -649,13 +688,19 @@ $queries{$sql}= \@dynamic; } $dynamic[0]=0 if ! defined($dynamic[0]) || !@dynamic; - $dynamic[0]+=ceil($size/256); # size bit + $dynamic[0]+=ceil($colsize/256); # size bit $col->is_varsize(1); $col->ver_dm('5.1',ceil($dynamic[0])); } } - elsif($type =~ /binary/ || $type =~ /char/) - {$col->dm($size)} + elsif($type =~ /binary/) + { + $col->dm($size); + } + elsif($type =~ /char/) + { + $col->dm($size*$charlen); + } elsif($type =~ /text/ || $type =~ /blob/) { $col->dm_overhead_set('length' => 8); @@ -875,6 +920,20 @@ else { $info= $dbh->selectall_hashref($sql,"Field"); + if ($infoschema) { + $sql ="select COLUMN_NAME,CHARACTER_SET_NAME + from information_schema.columns + where table_schema ='$schema' + and table_name='$table' + and CHARACTER_SET_NAME is not null"; + + my $char_info= $dbh->selectall_hashref($sql,"COLUMN_NAME"); + + foreach my $key (keys %$char_info) { + $info->{$key}{'Character_Set_Name'} = $char_info->{$key}{'CHARACTER_SET_NAME'}; + } + + } $queries{$sql}= $info; } } @@ -1173,6 +1232,13 @@ printf $f.'%20s %9s %5s','Varsize Columns DM/Row','','',''; printf $v, $vdm_totals{$_} || 0 foreach @{$r->versions}; print "\n"; + + # Warn of Table Creation Issues + # Max value for row length is 8052 + # + if ($dm_totals{'5.0'}>8052) { + print "ERROR: $tname tables maximum row length $dm_totals{'5.0'} too large to create in NDB Cluster Engine and will need refactoring\n"; + } # DM for Indexes @@ -1554,6 +1620,13 @@ print "\n"; + # Warn of Table Creation Issues + # Max value for row length is 8052 + # + if ($dm_totals{'5.0'}>8052) { + print $self->h4("ERROR: $tname tables maximum row length $dm_totals{'5.0'} too large to create in NDB Cluster Engine and will need refactoring"); + } + # DM for Indexes print $self->h3('DataMemory for Indexes'); print "\n";