#!/usr/bin/perl
use strict;
use warnings;
use DBI;

use Getopt::Long;
$Getopt::Long::ignorecase=0;

print "mysql_explain_slow_log\n";
print "======================\n";

my $Param={};

$Param->{host}='';
$Param->{user}='';
$Param->{password}='';
$Param->{PrintError}=0;
$Param->{socket}='';

if (!GetOptions ('date|d:i' => \$Param->{ViewDate},
                 'host|h:s' => \$Param->{host},
                 'user|u:s' => \$Param->{user},
                 'password|p:s' => \$Param->{password},
                 'printerror|e:s' => \$Param->{PrintError},
                 'socket|s:s' => \$Param->{socket},
                )) {
  ShowOptions();
}
else {
  $Param->{UpdateCount} = 0;
  $Param->{SelectCount} = 0;
  $Param->{LoadCount} = 0;
  $Param->{IdxUseCount} = 0;
  $Param->{LineCount} = 0;

  $Param->{QueryCount} = 0;

  $Param->{Query} = undef;
  $Param->{ALL} = undef ;
  $Param->{Comment} = undef ;

  @{$Param->{Rows}} = (qw|possible_keys key type|);

  if ($Param->{ViewDate}) {
    $Param->{View} = 0;
  }
  else {
    $Param->{View} = 1;
  }

  #print "Date=$Param->{ViewDate}, host=$Param->{host}, user=$Param->{user}, password=$Param->{password}\n";

  $Param->{dbh}=DBI->connect("DBI:mysql:host=$Param->{host}".($Param->{socket}?";mysql_socket=$Param->{socket}":""),$Param->{user},$Param->{password},{PrintError=>0});
  if (DBI::err()) {
    print "Error: " . DBI::errstr() . "\n";
  }
  else {
    $Param->{Start} = time;
    while(<>) {
      $Param->{LineCount}++;

      if ($Param->{ViewDate} ) {
        if (m/^# Time: (\d{6})\s+\d{1,2}:\d\d:\d\d\s.*$/) { # get date
          #print "# $1 #\n";
          if ($1 == $Param->{ViewDate}) {
            $Param->{View} = 1;
          }
          else {
            $Param->{View} = 0;
          }
        }
      }

      if ($Param->{View} ) {
        #print "->>>$_";

        if (m/^use\s+(.*);$/) { # get database
          #print "use $1\n";
          if (defined $1) {
            $Param->{DB} = $1 ;
          }
        }

        elsif (m/^(select.*);$/i) { # get SELECT query
          #print "S-$1\n";
          unless ($Param->{DB}) {
            #print "Error: No Database for query $1 found\n";
          }
          else {
            my $s = "$1";
            $Param->{Query}="EXPLAIN $s";
            $Param->{Query} =~ s/from\s/from $Param->{DB}./i;
            RunQuery($Param);
          }
        }

        elsif (m/^update\s+(\w+).+(where.+);$/i) { # get UPDATE query
          #print "U-update $1 where $2\n";
          unless ($Param->{DB}) {
            #print "Error: No Database for query: update $1 where $2 found\n";
          }
          else {
            $Param->{Query} ="EXPLAIN SELECT * FROM $1 $2";
            $Param->{Query} =~ s/from\s/from $Param->{DB}./i;
            RunQuery($Param);
          }
        }

        elsif (m/load/i) { # count load
          $Param->{LoadCount}++;
        }
      }
    }

    $Param->{dbh}->disconnect();

    print "\nIndex usage ------------------------------------\n";
    foreach my $t (sort keys %{$Param->{Data}}) {
      print "\nTable\t$t: ---\n";
      foreach my $k (sort keys %{$Param->{Data}->{$t}}) {
        print " count\t$k:\n";
        my %h = %{$Param->{Data}->{$t}->{$k}};
          foreach (sort {$h{$a} <=> $h{$b}} keys %h) {
          print "  $Param->{Data}->{$t}->{$k}->{$_}\t$_\n";
        }
      }
    }

    $Param->{AllCount}=0;
    print "\nQueries causing table scans -------------------\n\n";
    foreach (@{$Param->{ALL}}) {
      $Param->{AllCount} ++;
      print "$_\n";
    }
    print "Sum: $Param->{AllCount} table scans\n";

    print "\nSummary ---------------------------------------\n\n";
    print "Select: \t$Param->{SelectCount} queries\n";
    print "Update: \t$Param->{UpdateCount} queries\n";
    print "Load: \t$Param->{LoadCount} queries\n";
    print "\n";

    print "Logfile: \t$Param->{LineCount} lines\n";
    print "Started:  \t".localtime($Param->{Start})."\n";
    print "Finished:   \t".localtime(time)."\n";

  }
}

sub RunQuery {
  my $Param = shift ;

  if (defined $Param->{Query}) {
    if (defined $Param->{DB} ) {
      $Param->{Query} =~ m/from\s+(\w+[.]\w+|\w+)/i;
      $Param->{tab} = $1;
      #print "||$Param->{tab} -- $Param->{Query}\n";

      my $sth=$Param->{dbh}->prepare($Param->{Query});
      if (DBI::err()) {
        if ($Param->{PrintError}) {print "Error: ".DBI::errstr()."\n";}
      }
      else {
        #print "$Param->{Query}\n";
        $sth->execute();
        if (DBI::err()) {
          if ($Param->{PrintError}) {
            print "[$Param->{LineCount}]<<$Param->{Query}>>\n";
            print "Error: ".DBI::errstr()."\n";
          }
        }
        else {
          my $row = undef;
          while ($row = $sth->fetchrow_hashref()) {
            $Param->{SelectCount} ++;

            if (defined $row->{Comment}) {
              push (@{$Param->{Comment}}, "$row->{Comment}; $_; $Param->{DB}; $Param->{Query}");
            }
            foreach (@{$Param->{Rows}}) {
              if (defined $row->{$_}) {
                #if (($_ eq 'type' ) and ($row->{$_} eq 'ALL')) {
                if ($row->{type} eq 'ALL') {
                  push (@{$Param->{ALL}}, "$Param->{Query}");
                  #print ">> $row->{$_} $_ $Param->{DB} $Param->{Query}\n";
                }
                $Param->{IdxUseCount} ++;
                $Param->{Data}->{$Param->{tab}}->{$_}->{$row->{$_}} ++;
              }
            }
          }
        }
      }
      $sth->finish();
    }
    $Param->{Query} = undef ;
  }
}

sub ShowOptions {
  print <<EOF;
Usage: $0 [OPTIONS] < LOGFILE

--date=YYMMDD       select only entrys of date
-d YYMMDD
--host=HOSTNAME     db-host to ask
-h HOSTNAME
--user=USERNAME     db-user
-u USERNAME
--password=PASSWORD password of db-user
-p PASSWORD
--socket=SOCKET     mysqld socket file to connect
-s SOCKET
--printerror=1		enable error output
-e 1

Read logfile from STDIN an try to EXPLAIN all SELECT statements. All UPDATE statements are rewritten to an EXPLAIN SELECT statement. The results of the EXPLAIN statement are collected and counted. All results with type=ALL are collected in an separete list. Results are printed to STDOUT.

EOF
}

1;

__END__

=pod

=head1 NAME

mysql_explain_slow_log

Feed a mysqld slow log back into mysql
and collect statistics about index usage with EXPLAIN.

=head1 DISCUSSION

To optimize your indices, you have to know which ones are actually
used and what kind of queries are causing table scans. Especially
if you are generating your queries dynamically and you have a huge
amount of queries going on, this isn't easy.

Use this tool to take a look at the effects of your real life queries.
Then add indices to avoid table scans and remove those which aren't used.

=head1 USAGE

mysql_explain_slow_log [--date=YYMMDD] --host=dbhost] [--user=dbuser] [--password=dbpw] [--socket=/path/to/socket] [--printerror=1] < logfile

--date=YYMMDD       select only entrys of date

-d YYMMDD

--host=HOSTNAME     db-host to ask

-h HOSTNAME

--user=USERNAME     db-user

-u USERNAME

--password=PASSWORD password of db-user

-p PASSWORD

--socket=SOCKET     change path to the socket

-s SOCKET

--printerror=1		enable error output

-e 1

=head1 EXAMPLE

mysql_explain_slow_log --host=localhost --user=foo --password=bar < /var/lib/mysql/slow.log

=head1 AUTHORS

  Jan Willamowius <jan@willamowius.de>, http://www.willamowius.de/perl.html

=head1 SEE ALSO

mysql_explain_log documentation

=cut
