--- work/mysql/mysql-5.1-maint/scripts/mysqldumpslow 2007-12-14 13:55:33.000000000 -0500 +++ nilsmysqldumpslow 2008-01-09 11:43:17.000000000 -0500 @@ -1,9 +1,13 @@ -#!/usr/bin/perl +#!/usr/local/bin/perl # mysqldumpslow - parse and summarize the MySQL slow query log # Original version by Tim Bunce, sometime in 2000. # Further changes by Tim Bunce, 8th March 2001. # Handling of strings with \ and double '' by Monty 11 Aug 2001. +# Show database name by Thierry Randrianiriana 31st Jul 2006. +# inverse grep (G), rows_examined processing, IN and +# VALUES abstraction by Nils Goroll, 2006-2007 +# see http://bugs.mysql.com/bug.php?id=7414 use strict; use Getopt::Long; @@ -17,18 +21,22 @@ ); GetOptions(\%opt, - 'verbose|v+',# verbose - 'help+', # write usage info - 'debug|d+', # debug - 's=s', # what to sort by (t, at, l, al, r, ar etc) - 'r!', # reverse the sort order (largest last instead of first) - 't=i', # just show the top n queries - 'a!', # don't abstract all numbers to N and strings to 'S' - 'n=i', # abstract numbers with at least n digits within names - 'g=s', # grep: only consider stmts that include this string - 'h=s', # hostname of db server for *-slow.log filename (can be wildcard) - 'i=s', # name of server instance (if using mysql.server startup script) - 'l!', # don't subtract lock time from total time + 'verbose|v+', # verbose + 'help+', # write usage info + 'debug|d+', # debug + 's=s', # what to sort by (t, at, l, al, r, ar etc) + 'r!', # reverse the sort order (largest last instead of first) + 't=i', # just show the top n queries + 'a!', # don't abstract all numbers to N and strings to 'S' + 'ain!', # don't abstract IN (...) + 'avalues!', # don't abstract VALUES (...) + 'awhitespace!', # don't normalise arbitrary whitespace + 'n=i', # abstract numbers with at least n digits within names + 'g=s', # grep: only consider stmts that include this string + 'G=s', # inverse grep: only consider stmts that DO NOT include this string + 'h=s', # hostname of db server for *-slow.log filename (can be wildcard) + 'i=s', # name of server instance (if using mysql.server startup script) + 'l!', # don't subtract lock time from total time ) or usage("bad option"); $opt{'help'} and usage(); @@ -37,7 +45,7 @@ my $defaults = `my_print_defaults mysqld`; my $basedir = ($defaults =~ m/--basedir=(.*)/)[0] or die "Can't determine basedir from 'my_print_defaults mysqld' output: $defaults"; - warn "basedir=$basedir\n" if $opt{v}; + warn "basedir=$basedir\n" if $opt{verbose}; my $datadir = ($defaults =~ m/--datadir=(.*)/)[0]; my $slowlog = ($defaults =~ m/--log-slow-queries=(.*)/)[0]; @@ -53,7 +61,7 @@ unless grep { $_ eq $opt{i} } @instances; $datadir = ($instances =~ m/--$opt{i}-datadir=(.*)/)[0] or die "Can't determine --$opt{i}-datadir from 'my_print_defaults instances' output: $instances"; - warn "datadir=$datadir\n" if $opt{v}; + warn "datadir=$datadir\n" if $opt{verbose}; } if ( -f $slowlog ) { @@ -69,47 +77,82 @@ my @pending; my %stmt; +my $db; $/ = ";\n#"; # read entire statements using paragraph mode + while ( defined($_ = shift @pending) or defined($_ = <>) ) { - warn "[[$_]]\n" if $opt{d}; # show raw paragraph being read + warn "[[$_]]\n" if $opt{debug}; # show raw paragraph being read - my @chunks = split /^\/.*Version.*started with[\000-\377]*?Time.*Id.*Command.*Argument.*\n/m; + ## Remove the MySQL startup message and process anything which + ## is not the message itself by prepending it to @pending + my @chunks = split /\S+\s+Version.*started\s+with.*\n.*\nTime\s+Id\s+Command\s+Argument\s*\n/m; if (@chunks > 1) { unshift @pending, map { length($_) ? $_ : () } @chunks; - warn "<<".join(">>\n<<",@chunks).">>" if $opt{d}; + warn "<<".join(">>\n<<",@chunks).">>" if $opt{debug}; next; } + if(/(?:use|connect)\s+(\w+)/i) { + $db=$1; + } + s/^#? Time: \d{6}\s+\d+:\d+:\d+.*\n//; my ($user,$host) = s/^#? User\@Host:\s+(\S+)\s+\@\s+(\S+).*\n// ? ($1,$2) : ('',''); - s/^# Query_time: (\d+) Lock_time: (\d+) Rows_sent: (\d+).*\n//; - my ($t, $l, $r) = ($1, $2, $3); + # Query_time: 3 Lock_time: 0 Rows_sent: 3 Rows_examined: 532233 + s/^# Query_time: (\d+) Lock_time: (\d+) Rows_sent: (\d+) Rows_examined: (\d+).*\n//; + my ($t, $l, $r, $re) = ($1, $2, $3, $4); $t -= $l unless $opt{l}; - # remove fluff that mysqld writes to log when it (re)starts: - s!^/.*Version.*started with:.*\n!!mg; - s!^Tcp port: \d+ Unix socket: \S+\n!!mg; - s!^Time.*Id.*Command.*Argument.*\n!!mg; + ## slink@mcs.de 20071022: this should never match (see @chunks split above) + ## # remove fluff that mysqld writes to log when it (re)starts: + ## s!^/.*Version.*started with:.*\n!!mg; + ## s!^Tcp port:\s+\d+\s+Unix socket:\s+\S+\n!!mg; + ## s!^Time.*Id.*Command.*Argument.*\n!!mg; - s/^use \w+;\n//; # not consistently added + s/^(?:use|connect)\s+\w+;\n//; # not consistently added s/^SET timestamp=\d+;\n//; - s/^[ ]*\n//mg; # delete blank lines - s/^[ ]*/ /mg; # normalize leading whitespace + s/^[ \t]*\n//mg; # delete blank lines + s/^[ \t]*/ /mg; # normalize leading whitespace s/\s*;\s*(#\s*)?$//; # remove trailing semicolon(+newline-hash) next if $opt{g} and !m/$opt{g}/io; + next if $opt{G} and m/$opt{G}/io; + # abstract strings - do this before IN (...) and VALUES (...) + # to get correct results for values containg quotes unless ($opt{a}) { - s/\b\d+\b/N/g; - s/\b0x[0-9A-Fa-f]+\b/N/g; s/''/'S'/g; s/""/"S"/g; s/(\\')//g; s/(\\")//g; s/'[^']+'/'S'/g; s/"[^"]+"/"S"/g; + } + + unless ($opt{ain}) { + s/IN\s*\([^\)]*\)/IN (...)/gi; + } + + unless ($opt{avalues}) { + # this RE is not optimal (too greedy - eats everything after VALUES (),()... which ends with ) + # but the correct version of it triggers a perl bug (MCS: see CTS#58124) + ## correct code triggering bug: s/VALUES([,\s]*\([^\)]+\)){1,}/VALUES (...)/gi; + s/VALUES\s*\(.+\)/VALUES (...)/gi; + } + + unless ($opt{awhitespace}) { + ## slink@mcs.de: we can also abstract any duplicate whitespace - there is no + ## string we could destroy because strings are already reduced to S + ## in the code above - but we need to re-normalise leading whitespace again + s/\s+/ /g; + s/^[ \t]*/ /; + } + + unless ($opt{a}) { + s/\b\d+\b/N/g; + s/\b0x[0-9A-Fa-f]+\b/N/g; # -n=8: turn log_20001231 into log_NNNNNNNN s/([a-z_]+)(\d{$opt{n},})/$1.('N' x length($2))/ieg if $opt{n}; # abbreviate massive "in (...)" statements and similar @@ -121,18 +164,21 @@ $s->{t} += $t; $s->{l} += $l; $s->{r} += $r; + $s->{re} += $re; $s->{users}->{$user}++ if $user; $s->{hosts}->{$host}++ if $host; + $s->{dbs}->{$db}++ if $db; - warn "{{$_}}\n\n" if $opt{d}; # show processed statement string + warn "{{$_}}\n\n" if $opt{debug}; # show processed statement string } foreach (keys %stmt) { my $v = $stmt{$_} || die; - my ($c, $t, $l, $r) = @{ $v }{qw(c t l r)}; + my ($c, $t, $l, $r, $re) = @{ $v }{qw(c t l r re)}; $v->{at} = $t / $c; $v->{al} = $l / $c; $v->{ar} = $r / $c; + $v->{are} = $re / $c; } my @sorted = sort { $stmt{$b}->{$opt{s}} <=> $stmt{$a}->{$opt{s}} } keys %stmt; @@ -141,13 +187,15 @@ foreach (@sorted) { my $v = $stmt{$_} || die; - my ($c, $t,$at, $l,$al, $r,$ar) = @{ $v }{qw(c t at l al r ar)}; + my ($c, $t,$at, $l,$al, $r,$ar,$re,$are) = @{ $v }{qw(c t at l al r ar re are)}; my @users = keys %{$v->{users}}; my $user = (@users==1) ? $users[0] : sprintf "%dusers",scalar @users; my @hosts = keys %{$v->{hosts}}; my $host = (@hosts==1) ? $hosts[0] : sprintf "%dhosts",scalar @hosts; - printf "Count: %d Time=%.2fs (%ds) Lock=%.2fs (%ds) Rows=%.1f (%d), $user\@$host\n%s\n\n", - $c, $at,$t, $al,$l, $ar,$r, $_; + my @dbs = keys %{$v->{dbs}}; + my $db = (@dbs==1) ? $dbs[0] : sprintf "%ddbs",scalar @dbs; + printf "Count: %d Time=%.2fs (%ds) Lock=%.2fs (%ds) Rows=%.1f (%d) Rows examined=%.1f (%d), Database: %s $user\@$host\n%s\n\n", + $c, $at,$t, $al,$l, $ar,$r, $are, $re, $db, $_; } sub usage { @@ -163,12 +211,28 @@ -v verbose -d debug - -s ORDER what to sort by (t, at, l, al, r, ar etc), 'at' is default + -s ORDER what to sort by, 'at' is default + + absolute values: + c : Query count + t : Query time + l : Lock time + r : Rows sent + re : Rows examined + + for average values (average per query), prepend an a (except for c) + at, al, ar, are + -r reverse the sort order (largest last instead of first) -t NUM just show the top n queries -a don't abstract all numbers to N and strings to 'S' + -ain don't abstract IN (something) to 'IN (...)' + -avalues don't abstract VALUES (Something) to 'VALUES (...)' + -awhitespace don't normalise arbitrary whitespace (preserve original formatting) + note: leading whitespace is normalised always -n NUM abstract numbers with at least n digits within names -g PATTERN grep: only consider stmts that include this string + -G PATTERN inverse grep: only consider stmts that do not include this string -h HOSTNAME hostname of db server for *-slow.log filename (can be wildcard), default is '*', i.e. match all -i NAME name of server instance (if using mysql.server startup script)