Bug #60325 partition optimization
Submitted: 3 Mar 2011 21:29 Modified: 12 Mar 2011 10:37
Reporter: Roberto Spadim (Basic Quality Contributor) Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Partitions Severity:S3 (Non-critical)
Version:5.5, 5.1.57, 5.6.3 OS:Any
Assigned to: CPU Architecture:Any
Tags: partition, qc

[3 Mar 2011 21:29] Roberto Spadim
Description:
hi guys, could we optimize group by with partition fields?

for example

create table () partition by list (partition_field)
partition 1 values 1,2,3
partition 2 values 4,5,6
...

and a query like:
select partition_field,count(*) from table group by partition_field
could return fast like a select count(*) without partition

thanks

How to repeat:
create a table
partition by list  (one field)

insert many values

select count(*) from table group by partition_field
[4 Mar 2011 1:29] Roberto Spadim
check this:
----- create table:
CREATE TABLE `est_mov` (
  `unidade_id` int(20) NOT NULL DEFAULT '0',
  `lote_tipo` enum('v','c','i') NOT NULL DEFAULT 'v',
  `lote_spa` decimal(10,1) NOT NULL DEFAULT '0.0',
  `item_id` int(20) NOT NULL DEFAULT '0',
  `item_id_red` int(20) NOT NULL DEFAULT '0',
  `mov_id` char(30) NOT NULL DEFAULT '0',
  `oe_tipo` enum('op','oe','tr','ex','bx','bx-tr','rp-tr') NOT NULL DEFAULT 'oe',
  `oe` bigint(20) NOT NULL DEFAULT '0',
  `oe_seq` int(20) NOT NULL DEFAULT '0',
  `table_partition` int(11) NOT NULL DEFAULT '0',
  `lote_data` date NOT NULL DEFAULT '0000-00-00',
  `lote` bigint(20) NOT NULL DEFAULT '0',
  `lote_sif_data` date NOT NULL DEFAULT '0000-00-00',
  `lote_sif` int(10) unsigned NOT NULL DEFAULT '0',
  `estoque_entrada_un` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_org` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_org_red` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_mapa` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_mapa_red` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_pess_tipo` enum('f','j') NOT NULL DEFAULT 'f',
  `estoque_entrada_pess_id` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_propriedade` int(20) NOT NULL DEFAULT '0',
  `estoque_entrada_proprietario_tipo` enum('f','j') NOT NULL DEFAULT 'f',
  `estoque_entrada_proprietario_id` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_un` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_org` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_org_red` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_mapa` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_mapa_red` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_pess_tipo` enum('f','j') NOT NULL DEFAULT 'f',
  `estoque_saida_pess_id` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_propriedade` int(20) NOT NULL DEFAULT '0',
  `estoque_saida_proprietario_tipo` enum('f','j') NOT NULL DEFAULT 'f',
  `estoque_saida_proprietario_id` int(20) NOT NULL DEFAULT '0',
  `quant_mov_un` char(5) NOT NULL DEFAULT 'un',
  `quant` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `pecas` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `pbruto` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `pliq` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `vbruto` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `vliq` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `data_etiqueta` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `data_producao` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `data_processamento` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `data_validade` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `data_estoque` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `data_estoque_origem` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `data_estoque_destino` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
  `rastreabilidade_interna` char(25) NOT NULL DEFAULT '',
  `rastreabilidade_caixa` char(25) NOT NULL DEFAULT '',
  `rastreabilidade_externa` char(25) NOT NULL DEFAULT '',
  `rastreabilidade_pallet` char(25) NOT NULL DEFAULT '',
  `externa_tipo` char(25) NOT NULL DEFAULT '',
  `valor_bruto_total` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `valor_liq_total` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `custo` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `custo_medio_anterior` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `custo_medio_apos` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `cfop_id` int(20) NOT NULL DEFAULT '0',
  `cfop_id_red` int(20) NOT NULL DEFAULT '0',
  `contrato_producao` char(25) NOT NULL DEFAULT '',
  `quant_real` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `pecas_real` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `pliq_real` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `pbruto_real` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `vliq_real` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `vbruto_real` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `usuario` char(25) NOT NULL DEFAULT '',
  `sessao` char(41) NOT NULL DEFAULT '',
  `lote_rendimento_data` date NOT NULL DEFAULT '0000-00-00',
  `lote_rendimento_numero` int(20) NOT NULL DEFAULT '0',
  `ponto_movimentacao` int(20) NOT NULL DEFAULT '0',
  `rendimento` enum('Y','N') NOT NULL DEFAULT 'N',
  `custo_rendimento` enum('Y','N') NOT NULL DEFAULT 'N',
  `custo_anterior_rendimento` decimal(17,5) NOT NULL DEFAULT '0.00000',
  `operacao` int(10) unsigned NOT NULL DEFAULT '0',
  `data_estoque_ts` decimal(21,7) NOT NULL DEFAULT '0.0000000',
  `codigo_vinculo_crossdocking` decimal(20,7) unsigned NOT NULL DEFAULT '0.0000000',
  `crossdock_unidade_id` int(11) NOT NULL DEFAULT '0',
  `crossdock_lote_tipo` enum('v','c','i') NOT NULL DEFAULT 'v',
  `crossdock_lote_spa` decimal(10,1) NOT NULL DEFAULT '0.0',
  `crossdock_oe` int(11) NOT NULL DEFAULT '0',
  `crossdock_oe_seq` int(11) NOT NULL DEFAULT '0',
  PRIMARY KEY (`unidade_id`,`lote_tipo`,`lote_spa`,`item_id`,`item_id_red`,`mov_id`,`oe_tipo`,`oe`,`oe_seq`,`table_partition`),
  KEY `lote` (`unidade_id`,`lote_tipo`,`lote_spa`,`item_id`,`item_id_red`,`mov_id`,`oe_tipo`,`oe`,`oe_seq`),
  KEY `oe` (`unidade_id`,`oe_tipo`,`oe`,`oe_seq`,`item_id`,`item_id_red`),
  KEY `cfop` (`cfop_id`,`cfop_id_red`),
  KEY `item` (`item_id`,`item_id_red`,`data_estoque`),
  KEY `ponto_movimentacao` (`unidade_id`,`oe_tipo`,`ponto_movimentacao`,`usuario`,`data_estoque`,`item_id`,`item_id_red`),
  KEY `rastro_mov_id` (`mov_id`,`data_estoque`),
  KEY `rendimento_transfs` (`unidade_id`,`item_id`,`oe_tipo`,`data_estoque`,`oe`,`ponto_movimentacao`),
  KEY `transferencias` (`item_id`,`data_estoque`,`oe_tipo`,`item_id_red`),
  KEY `data_estoque` (`data_estoque`),
  KEY `ponto2` (`unidade_id`,`oe_tipo`,`ponto_movimentacao`,`data_estoque`,`item_id`,`item_id_red`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
/*!50100 PARTITION BY LIST (table_partition)
(PARTITION atual VALUES IN (0) ENGINE = MyISAM,
 PARTITION historico VALUES IN (1) ENGINE = MyISAM) */

----- select times: 16125182 rows
==============
select count(*) from est_mov;
/* 0 rows affected, 1 rows found. Duration for 1 query: 3,650 sec. */

==============
select table_partition,count(*) from est_mov group by table_partition;
/* 0 rows affected, 2 rows found. Duration for 1 query: 195,485 sec. */

"table_partition","count(*)"
"0","8501418"
"1","7623764"
==============

  3,650 SECONDS
vs
195,485 SECONDS!!!!!
check partial, select count(*) from partitions...

===
select count(*) from est_mov where table_partition=0;
/* 0 rows affected, 1 rows found. Duration for 1 query: 16,708 sec. */
select count(*) from est_mov where table_partition=1;
/* 0 rows affected, 1 rows found. Duration for 1 query: 21,637 sec. */
===
i was specting at least 16,708+21,637=38,345 seconds for count(*) group by table partition...
[12 Mar 2011 10:37] Sveta Smirnova
Thank you for the report.

Verified as described. Strictly this is feature request.

Test case for MTR:

create table t1(
id int not null auto_increment,
f1 int,
primary key(id, f1)
) engine=myisam partition by list(f1)
(partition first values in (1,2,3) engine=myisam,
partition second values in (4,5,6) engine=myisam);

insert into t1 (f1) values(1),(2),(3),(4),(5),(6);
insert into t1 (f1) values(1),(2),(3),(4),(5),(6);
insert into t1 (f1) values(1),(2),(3),(4),(5),(6);
insert into t1 (f1) values(1),(2),(3),(4),(5),(6);
insert into t1 (f1) values(1),(2),(3),(4),(5),(6);
insert into t1 (f1) values(1),(2),(3),(4),(5),(6);

flush status;
show status like 'ha_%';
select count(*) from t1;
show status like 'ha_%';
select count(*) from t1 group by f1;
show status like 'ha_%';

Result:

flush status;
show status like 'ha_%';
Variable_name   Value
Handler_commit  0
Handler_delete  0
Handler_discover        0
Handler_external_lock   0
Handler_mrr_init        0
Handler_prepare 0
Handler_read_first      0
Handler_read_key        0
Handler_read_last       0
Handler_read_next       0
Handler_read_prev       0
Handler_read_rnd        0
Handler_read_rnd_next   0
Handler_rollback        0
Handler_savepoint       0
Handler_savepoint_rollback      0
Handler_update  0
Handler_write   0
select count(*) from t1;
count(*)
36
show status like 'ha_%';
Variable_name   Value
Handler_commit  0
Handler_delete  0
Handler_discover        0
Handler_external_lock   6
Handler_mrr_init        0
Handler_prepare 0
Handler_read_first      0
Handler_read_key        0
Handler_read_last       0
Handler_read_next       0
Handler_read_prev       0
Handler_read_rnd        0
Handler_read_rnd_next   0
Handler_rollback        0
Handler_savepoint       0
Handler_savepoint_rollback      0
Handler_update  0
Handler_write   0
select count(*) from t1 group by f1;
count(*)
6
6
6
6
6
6
show status like 'ha_%';
Variable_name   Value
Handler_commit  0
Handler_delete  0
Handler_discover        0
Handler_external_lock   12
Handler_mrr_init        0
Handler_prepare 0
Handler_read_first      2
Handler_read_key        36
Handler_read_last       0
Handler_read_next       36
Handler_read_prev       0
Handler_read_rnd        6
Handler_read_rnd_next   7
Handler_rollback        0
Handler_savepoint       0
Handler_savepoint_rollback      0
Handler_update  30
Handler_write   6
[25 Apr 2013 11:58] Mattias Jonsson
If there are multiple list values in a partition, that partition would still need to read all values and do a group by!

Only if there is exactly one list value (PARTITION p0 VALUES IN (0)) it could use this optimization.