=== modified file 'storage/ndb/tools/ndb_size.pl' (properties changed: -x to +x) --- storage/ndb/tools/ndb_size.pl 2009-05-26 18:53:34 +0000 +++ storage/ndb/tools/ndb_size.pl 2010-03-26 17:57:46 +0000 @@ -34,6 +34,8 @@ # We currently estimate sizes for: 4.1, 5.0 and 5.1 to various amounts # of accurracy. # +# We also do a few rudimentary checks to see if tables are creatable on NDB cluster +# # There is no warranty. # # BUGS @@ -42,7 +44,13 @@ # - some float stores come out weird (when there's a comma e.g. 'float(4,1)') # - no disk data values # - computes the storage requirements of views (and probably MERGE) -# - ignores character sets? +# - ignores problems with prefix key support on Ndb Cluster +# e.g. CREATE TABLE t1 (a int, b varchar(20), c int, primary key (a,b(10)), key (c)); +# - Treats var sized keys as fixed size +# - Underestimates v1 internal Blob table primary key size (it's an array of int, not an int) +# - Does not take v2 internal Blob table into account (Varsize parts) +# - Does not show max char len used in charset size calculations. +# package MySQL::NDB::Size::Parameter; @@ -80,6 +88,7 @@ type is_varsize size + charlen Key) ], hash => 'dm_overhead', scalar => [{ -default => 4 },'align'], @@ -417,7 +426,8 @@ $debug, $format, $excludetables, - $excludedbs); + $excludedbs, + $defaultcharset); GetOptions('database|d=s'=>\$database, 'hostname=s'=>\$hostname, @@ -428,6 +438,7 @@ 'loadqueries|l=s'=>\$loadqueries, 'excludetables=s'=>\$excludetables, 'excludedbs=s'=>\$excludedbs, + 'default-character-set=s'=>\$defaultcharset, 'help|usage|h!'=>\$help, 'debug'=>\$debug, 'format|f=s'=>\$format, @@ -452,6 +463,7 @@ print STDERR "\t--excludedbs Comma separated list of database names to skip\n"; print STDERR "\t--savequeries= saves all queries to the DB into \n"; print STDERR "\t--loadqueries= loads query results from . Doesn't connect to DB.\n"; + print STDERR "\t--default-character-set= Assume all CHAR/VARCHAR/TEXT columns in tables will be storing this character set\n"; exit(1); } @@ -493,6 +505,8 @@ $excludetables = join (',', map { $dbh->quote($_) } split ',', $excludetables ) if $excludetables; +$defaultcharset='notset' if (!defined($defaultcharset)); + if(!$loadqueries) { if (scalar(keys %withdb)>1) @@ -517,7 +531,8 @@ $report->versions('4.1','5.0','5.1'); -my $tables; +my ($tables,$charset_len); +my $using_infoschema=1; if($loadqueries) { @@ -525,7 +540,7 @@ } else { - my $sql= "select t.TABLE_NAME,t.TABLE_SCHEMA " . + my $sql= "select t.TABLE_NAME,t.TABLE_SCHEMA, t.TABLE_COLLATION " . " from information_schema.TABLES t " . " where t.TABLE_SCHEMA in ( $dblist ) "; @@ -537,12 +552,27 @@ if (!$tables) { print "WARNING: problem selecing from INFORMATION SCHEMA ($sql)\n"; + $using_infoschema=0; if ($#dbs>0) { print "\t attempting to fallback to show tables from $database"; $tables= $dbh->selectall_arrayref("show tables from $database\n"); } else { print "All Databases not supported in 4.1. Please specify --database=\n"; } + print "Cannot access charset max length info, using defaults\n"; + $charset_len={'default' => {MAXLEN=>1}, + 'utf8' => {MAXLEN=>3}, + 'ucs2' => {MAXLEN=>2}, + 'utf16' => {MAXLEN=>2}, + }; + if ($defaultcharset =~ /notset/) + { + print "No per-column character set info, assuming 1 byte/char\n"; + } + } else { + $sql= "select CHARACTER_SET_NAME,MAXLEN from INFORMATION_SCHEMA.CHARACTER_SETS"; + $charset_len= $dbh->selectall_hashref($sql,"CHARACTER_SET_NAME"); + $charset_len->{'default'} = {MAXLEN=>1}; } $queries{"show tables"}= $tables; } @@ -560,7 +590,7 @@ foreach my $colname (keys %$info) { my $col= new MySQL::NDB::Size::Column(name => $colname); - my ($type, $size); + my ($type, $size, $charlen, $bytesize); $col->Key($$info{$colname}{Key}) if(defined($$info{$colname}{Key}) &&$$info{$colname}{Key} ne ''); @@ -583,9 +613,28 @@ else { $type= $$info{$colname}{Type}; + $size= 0; } + + # Work out byte length for charset types + $charlen=$charset_len->{'default'}{MAXLEN}; # default + if (defined ($charset_len->{$defaultcharset})) { + $charlen=$charset_len->{$defaultcharset}{MAXLEN}; # Force default-character-set + } elsif (defined($$info{$colname}{CHARACTER_SET_NAME})) { + $charlen=$charset_len->{$$info{$colname}{CHARACTER_SET_NAME}}{MAXLEN}; + } + + $bytesize = $size; + if ($type =~ /char/ || + $type =~ /varchar/ || + $type =~ /text/) + { + $bytesize = $size * $charlen; + } + $col->type($type); $col->size($size); + $col->charlen($charlen); if($type =~ /tinyint/) {$col->dm(1)} @@ -629,11 +678,12 @@ } elsif($type =~ /varchar/ || $type =~ /varbinary/) { - my $fixed=$size+ceil($size/256); - $col->dm_overhead_set('length' => ceil($size/256)); + my $lenbytes=ceil($bytesize/256); + my $fixed=$bytesize+$lenbytes; + $col->dm_overhead_set('length' => $lenbytes); $col->dm($fixed); - if(!$col->Key()) # currently keys must be non varsized - { + if(!$col->Key()) # Bug - var keys are not fixed. + { my $sql= sprintf("select avg(length(`%s`)) " . " from `%s`.`%s` " , $colname, $t->schema(), $t->table_name()); @@ -649,13 +699,13 @@ $queries{$sql}= \@dynamic; } $dynamic[0]=0 if ! defined($dynamic[0]) || !@dynamic; - $dynamic[0]+=ceil($size/256); # size bit + $dynamic[0]+=$lenbytes; # size bit $col->is_varsize(1); $col->ver_dm('5.1',ceil($dynamic[0])); } } elsif($type =~ /binary/ || $type =~ /char/) - {$col->dm($size)} + {$col->dm($bytesize)} elsif($type =~ /text/ || $type =~ /blob/) { $col->dm_overhead_set('length' => 8); @@ -875,6 +925,19 @@ else { $info= $dbh->selectall_hashref($sql,"Field"); + if ($using_infoschema) { + $sql ="select COLUMN_NAME,CHARACTER_SET_NAME + from information_schema.columns + where table_schema ='$schema' + and table_name='$table' + and CHARACTER_SET_NAME is not null"; + + my $char_info= $dbh->selectall_hashref($sql,"COLUMN_NAME"); + + foreach my $key (keys %$char_info) { + $info->{$key}{'CHARACTER_SET_NAME'} = $char_info->{$key}{'CHARACTER_SET_NAME'}; + } + } $queries{$sql}= $info; } } @@ -1131,6 +1194,7 @@ print "\n"; my %dm_totals; my %vdm_totals; + my %dm_rowlen_totals; while(my ($cname, $c)= $t->columns_each()) { $c->type =~ /^([^\(]*)/g; @@ -1161,12 +1225,16 @@ printf $v, $c->dm||'N/A'; $dm_totals{$_}+=$c->dm||0; } + $dm_rowlen_totals{$_}+=$c->dm||0; } print "\n"; } printf $f.'%20s %9s %5s','','','', ''; printf $v, '--' foreach @{$t->dm_versions}; print "\n"; + printf $f.'%20s %9s %5s','Max bytes/Row','','',''; + printf $v, $dm_rowlen_totals{$_} foreach @{$r->versions}; + print "\n"; printf $f.'%20s %9s %5s','Fixed Size Columns DM/Row','','',''; printf $v, $dm_totals{$_} foreach @{$r->versions}; print "\n"; @@ -1174,6 +1242,17 @@ printf $v, $vdm_totals{$_} || 0 foreach @{$r->versions}; print "\n"; + # Warn of Table Creation Issues + # Max value for row length is 8052 + # + foreach(@{$r->versions}) + { + if ($dm_rowlen_totals{$_} > 8052) { + print "Warning: $tname tables maximum row length $dm_rowlen_totals{$_} too large to create in NDB Cluster Engine and will need refactoring\n"; + last; + } + } + # DM for Indexes print "\n\nDataMemory for Indexes:\n"; @@ -1502,6 +1581,7 @@ my %dm_totals; my %vdm_totals; + my %dm_rowlen_totals; while(my ($cname, $c)= $t->columns_each()) { $c->type =~ /^([^\(]*)/g; @@ -1525,6 +1605,7 @@ push @verinfo, $c->dm||'N/A'; $dm_totals{$_}+=$c->dm||0; } + $dm_rowlen_totals{$_}+=$c->dm||0; } print $self->tr( @@ -1538,6 +1619,13 @@ } { + my @maxbytes; + push @maxbytes, $self->b($dm_rowlen_totals{$_}) foreach @{$r->versions}; + print $self->tr($self->b('Max bytes/Row'),'','','', + @maxbytes); + + } + { my @dmtot; push @dmtot, $self->b($dm_totals{$_}) foreach @{$r->versions}; print $self->tr($self->b('Fixed Size Columns DM/Row'),'','','', @@ -1554,6 +1642,17 @@ print "\n"; + # Warn of Table Creation Issues + # Max value for row length is 8052 + # + foreach(@{$r->versions}) + { + if ($dm_rowlen_totals{$_} > 8052) { + print $self->h4("Warning: $tname tables maximum row length $dm_rowlen_totals{$_} too large to create in NDB Cluster Engine and will need refactoring"); + last; + } + } + # DM for Indexes print $self->h3('DataMemory for Indexes'); print "\n";