Bug #27199 Performance problem when query contains a left join against a subquery
Submitted: 16 Mar 2007 5:03 Modified: 9 Sep 2013 9:28
Reporter: Stephen Gornick Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S2 (Serious)
Version:5.0.22, 5.0.37, 4.1 BK, 5.1 BK OS:Linux (linux)
Assigned to: Assigned Account CPU Architecture:Any
Tags: 100%, 5.0.22, 5.0.37, cpu, derived, derived table, join, left, left join, MAX, maxed, Optimize, optimized, Optimizer, outer, OUTER JOIN, peg, pegged, subquery, table

[16 Mar 2007 5:03] Stephen Gornick
Description:
A query that includes a left join against a subquery performs terribly (many minutes instead of the fraction of a second expected.)

How to repeat:
CREATE SCHEMA test;
DROP TABLE IF EXISTS test.parts;
CREATE TABLE test.parts (
  id INT UNSIGNED PRIMARY KEY AUTO_INCREMENT
) ENGINE=MyISAM;

-- Generate 65,536 test records, 
INSERT INTO test.parts values (0);
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;
INSERT INTO test.parts SELECT NULL FROM test.parts;

DROP TABLE IF EXISTS test.parts2;
CREATE TABLE test.parts2 (
  id INT UNSIGNED PRIMARY KEY AUTO_INCREMENT
) ENGINE=MyISAM;

-- Generate 65,536 test records, 
INSERT INTO test.parts2 SELECT * FROM test.parts;

SELECT
  COUNT(*)
FROM
  test.parts
LEFT JOIN
( SELECT
    parts2.id
  FROM
    test.parts2
) subquery
ON (
  subquery.id = parts.id
);

Instead of taking 2 seconds, the cpu will stay pegged at 100% for many minutes, or longer, before producing the result set.
[16 Mar 2007 5:12] Stephen Gornick
Bug #25926 might be the same problem.
[16 Mar 2007 9:02] Sveta Smirnova
Thank you for the report.

Verified as described.
[21 Mar 2007 16:29] Igor Babaev
The query uses a derived table which is always materialized now.
This problem will be fixed in future versions when derived tables
will be handled as unnamed views.
[6 Sep 2013 16:10] Jeffory Orrok
I have a fairly decent system with a multi-core i7 and 24GB of RAM, and the following queries never completed even after 24 hours of execution (also check out the explain at the very end where it seems to have gotten confused)

drop table if exists voter_id_2012;
create table voter_id_2012 select distinct VOTER_ID from VoterHistory2012;
alter table voter_id_2012 add index(VOTER_ID);

drop table if exists voting_profile_8_10_12;
create table voting_profile_8_10_12
select v.VOTER_ID, 
pri2008,
gen2008,
pri2010,
gen2010,
pri2012,
gen2012

from voter_id_2012 v 
left join (select VOTER_ID, 1 as pri2008 from VoterHistory2012 p8 where p8.z_YEAR=2008 and  p8.z_MONTH=8 order by VOTER_ID) as pri8 using(VOTER_ID)
left join (select VOTER_ID, 1 as gen2008 from VoterHistory2012 g8 where g8.z_YEAR=2008 and  g8.z_MONTH=11 order by VOTER_ID) as gen8 using(VOTER_ID)
left join (select VOTER_ID, 1 as pri2010 from VoterHistory2012 p10 where p10.z_YEAR=2010 and  p10.z_MONTH=8 order by VOTER_ID) as pri10 using(VOTER_ID)
left join (select VOTER_ID, 1 as gen2010 from VoterHistory2012 g10 where g10.z_YEAR=2010 and  g10.z_MONTH=11 order by VOTER_ID) as gen10 using(VOTER_ID)
left join (select VOTER_ID, 1 as pri2012 from VoterHistory2012 p12 where p12.z_YEAR=2012 and  p12.z_MONTH=6 order by VOTER_ID) as pri12 using(VOTER_ID)
left join (select VOTER_ID, 1 as gen2012 from VoterHistory2012 g12 where g12.z_YEAR=2012 and  g12.z_MONTH=11 order by VOTER_ID) as gen12 using(VOTER_ID)
;

update voting_profile_8_10_12
set pri2008=if(pri2008 is null, 0, 1),
set gen2008=if(gen2008 is null, 0, 1),
set pri2010=if(pri2010 is null, 0, 1),
set gen2010=if(gen2010 is null, 0, 1),
set pri2012=if(pri2012 is null, 0, 1),
set gen2012=if(gen2012 is null, 0, 1)
;

alter table voting_profile_8_10_12
add index(VOTER_ID),
add index(pri2008),
add index(gen2008),
add index(pri2010),
add index(gen2010),
add index(pri2012),
add index(gen2012)
;

### output from explain:

mysql> explain select v.VOTER_ID,
    -> pri2008,
    -> gen2008,
    -> pri2010,
    -> gen2010,
    -> pri2012,
    -> gen2012
    ->
    -> from voter_id_2012 v
    -> left join (select VOTER_ID, 1 as pri2008 from VoterHistory2012 p8 where p8.z_YEAR=2008 and  p8.z_MONTH=8 order by VOTER_ID) as pri8 using(VOTER_ID)
    -> left join (select VOTER_ID, 1 as gen2008 from VoterHistory2012 g8 where g8.z_YEAR=2008 and  g8.z_MONTH=11 order by VOTER_ID) as gen8 using(VOTER_ID)
    -> left join (select VOTER_ID, 1 as pri2010 from VoterHistory2012 p10 where p10.z_YEAR=2010 and  p10.z_MONTH=8 order by VOTER_ID) as pri10 using(VOTER_ID)
    -> left join (select VOTER_ID, 1 as gen2010 from VoterHistory2012 g10 where g10.z_YEAR=2010 and  g10.z_MONTH=11 order by VOTER_ID) as gen10 using(VOTER_ID)
    -> left join (select VOTER_ID, 1 as pri2012 from VoterHistory2012 p12 where p12.z_YEAR=2012 and  p12.z_MONTH=6 order by VOTER_ID) as pri12 using(VOTER_ID)
    -> left join (select VOTER_ID, 1 as gen2012 from VoterHistory2012 g12 where g12.z_YEAR=2012 and  g12.z_MONTH=11 order by VOTER_ID) as gen12 using(VOTER_ID)
    -> ;
+----+-------------+------------+-------+----------------+----------+---------+------+----------+----------------+
| id | select_type | table      | type  | possible_keys  | key      | key_len | ref  | rows     | Extra          |
+----+-------------+------------+-------+----------------+----------+---------+------+----------+----------------+
|  1 | PRIMARY     | v          | index | NULL           | VOTER_ID | 12      | NULL |  3186737 | Using index    |
|  1 | PRIMARY     | <derived2> | ALL   | NULL           | NULL     | NULL    | NULL |   457001 |                |
|  1 | PRIMARY     | <derived3> | ALL   | NULL           | NULL     | NULL    | NULL |  2310512 |                |
|  1 | PRIMARY     | <derived4> | ALL   | NULL           | NULL     | NULL    | NULL |   747818 |                |
|  1 | PRIMARY     | <derived5> | ALL   | NULL           | NULL     | NULL    | NULL |  1775961 |                |
|  1 | PRIMARY     | <derived6> | ALL   | NULL           | NULL     | NULL    | NULL |   528612 |                |
|  1 | PRIMARY     | <derived7> | ALL   | NULL           | NULL     | NULL    | NULL |  2594114 |                |
|  7 | DERIVED     | g12        | ALL   | z_YEAR,z_MONTH | z_YEAR   | 4       |      | 28072159 | Using filesort |
|  6 | DERIVED     | p12        | ALL   | z_YEAR,z_MONTH | z_MONTH  | 4       |      | 28072159 | Using filesort |
|  5 | DERIVED     | g10        | ALL   | z_YEAR,z_MONTH | z_YEAR   | 4       |      | 28072159 | Using filesort |
|  4 | DERIVED     | p10        | ALL   | z_YEAR,z_MONTH | NULL     | NULL    | NULL | 28072159 | Using filesort |
|  3 | DERIVED     | g8         | ALL   | z_YEAR,z_MONTH | z_YEAR   | 4       |      | 28072159 | Using filesort |
|  2 | DERIVED     | p8         | ALL   | z_YEAR,z_MONTH | NULL     | NULL    | NULL | 28072159 | Using filesort |
+----+-------------+------------+-------+----------------+----------+---------+------+----------+----------------+
13 rows in set (23.29 sec)

### composition of VoterHistory2012

mysql> describe VoterHistory2012;
+----------------------+-------------+------+-----+---------+-------+
| Field                | Type        | Null | Key | Default | Extra |
+----------------------+-------------+------+-----+---------+-------+
| VOTER_ID             | varchar(9)  | YES  | MUL | NULL    |       |
| ELECTION_TYPE        | varchar(17) | YES  | MUL | NULL    |       |
| ELECTION_DATE        | date        | YES  | MUL | NULL    |       |
| ELECTION_DESCRIPTION | varchar(57) | YES  | MUL | NULL    |       |
| VOTING_METHOD        | varchar(18) | YES  | MUL | NULL    |       |
| PARTY                | varchar(21) | YES  | MUL | NULL    |       |
| COUNTY_NAME          | varchar(11) | YES  | MUL | NULL    |       |
| z_YEAR               | int(11)     | NO   | MUL | NULL    |       |
| z_MONTH              | int(11)     | NO   | MUL | NULL    |       |
+----------------------+-------------+------+-----+---------+-------+
9 rows in set (0.00 sec)
[9 Sep 2013 9:28] Øystein Grøvlen
This is fixed in 5.6 which provides indexes for materialized derived tables.