MySQL Bugs: #87947: Optimizer chooses ref over range when access when range access is faster

Bug #87947	Optimizer chooses ref over range when access when range access is faster
Submitted:	3 Oct 2017 4:00	Modified:	9 Oct 2017 5:36
Reporter:	Jaime Sicam	Email Updates:
Status:	Verified	Impact on me:	None
Category:	MySQL Server: Optimizer	Severity:	S5 (Performance)
Version:	5.7.19	OS:	Any
Assigned to:		CPU Architecture:	Any

Description:
This is similar to https://bugs.mysql.com/bug.php?id=26966 but I have a test case here for your review.

Table structure:
CREATE TABLE `t2` (
  `a` int(11) NOT NULL AUTO_INCREMENT,
  `b` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  `c` int(11) DEFAULT NULL,
  PRIMARY KEY (`a`),
  KEY `b` (`b`),
  KEY `c` (`c`)
) ENGINE=InnoDB AUTO_INCREMENT=20471 DEFAULT CHARSET=latin1

As you can see here, it's preferred index c over b:
explain format=json select count(*) from t2 where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1;

 {
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "1093.00"
    },
    "table": {
      "table_name": "t2",
      "access_type": "ref",
      "possible_keys": [
        "b",
        "c"
      ],
      "key": "c",
      "used_key_parts": [
        "c"
      ],
      "key_length": "5",
      "ref": [
        "const"
      ],
      "rows_examined_per_scan": 5120,
      "rows_produced_per_join": 507,
      "filtered": "9.91",
      "cost_info": {
        "read_cost": "69.00",
        "eval_cost": "101.44",
        "prefix_cost": "1093.00",
        "data_read_per_join": "7K"
      },
      "used_columns": [
        "b",
        "c"
      ],
      "attached_condition": "(`test`.`t2`.`b` between '2017-10-03 11:40:09' and '2017-10-03 11:40:10')"
    }
  }
} 

Using force shows that using b index is costlier(1434.61 vs 1093.00):

explain format=json select count(*) from t2 force index(b) where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1;

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "1434.61"
    },
    "table": {
      "table_name": "t2",
      "access_type": "range",
      "possible_keys": [
        "b"
      ],
      "key": "b",
      "used_key_parts": [
        "b"
      ],
      "key_length": "4",
      "rows_examined_per_scan": 1024,
      "rows_produced_per_join": 0,
      "filtered": "0.01",
      "index_condition": "(`test`.`t2`.`b` between '2017-10-03 11:40:09' and '2017-10-03 11:40:10')",
      "cost_info": {
        "read_cost": "1434.59",
        "eval_cost": "0.02",
        "prefix_cost": "1434.61",
        "data_read_per_join": "1"
      },
      "used_columns": [
        "b",
        "c"
      ],
      "attached_condition": "(`test`.`t2`.`c` = 1)"
    }
  }
}

But in terms of query speed, using range performs much faster:
mysql> show profiles;
+----------+------------+-----------------------------------------------------------------------------------------------------------------+
| Query_ID | Duration   | Query                                                                                                           |
+----------+------------+-----------------------------------------------------------------------------------------------------------------+
|        1 | 0.00768700 | select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        2 | 0.00722625 | select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        3 | 0.00784675 | select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        4 | 0.01503375 | select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1                |
|        5 | 0.01557875 | select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1                |
|        6 | 0.01523025 | select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1                |
|        7 | 0.01354375 | select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        8 | 0.01504275 | select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        9 | 0.01492250 | select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
+----------+------------+-----------------------------------------------------------------------------------------------------------------+
9 rows in set, 1 warning (0.00 sec)

How to repeat:
The table dump has been provided. Please run:
SET PROFILING=1;

select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 ;
select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 ;
select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 ;
select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 ;
select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 ;
select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 ;
select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1;
select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1;
select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1;

Then, run SHOW PROFILES;

test data

Attachment: t2.sql (application/octet-stream, text), 315.82 KiB.

Hello Jaime Sicam,

Thank you for the report.
With the provided test case I can see that "range" is faster than "ref" but I'm not seeing that optimize is choosing "ref" here. Could you please provide complete dump if earlier provided is partial?

-- instance running with default settings

root@localhost [test]> show profiles;
+----------+------------+-----------------------------------------------------------------------------------------------------------------+
| Query_ID | Duration   | Query                                                                                                           |
+----------+------------+-----------------------------------------------------------------------------------------------------------------+
|        1 | 0.00078225 | select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        2 | 0.00018025 | select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        3 | 0.00018150 | select count(*) from t2 force index(b)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        4 | 0.00019250 | select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1                |
|        5 | 0.00016725 | select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1                |
|        6 | 0.00017150 | select count(*) from t2  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1                |
|        7 | 0.00939725 | select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        8 | 0.00725875 | select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
|        9 | 0.00731375 | select count(*) from t2 force index(c)  where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1 |
+----------+------------+-----------------------------------------------------------------------------------------------------------------+
9 rows in set, 1 warning (0.00 sec)

root@localhost [test]> explain format=json select count(*) from t2 where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1\G
*************************** 1. row ***************************
EXPLAIN: {
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "2.41"
    },
    "table": {
      "table_name": "t2",
      "access_type": "range",
      "possible_keys": [
        "b",
        "c"
      ],
      "key": "b",
      "used_key_parts": [
        "b"
      ],
      "key_length": "4",
      "rows_examined_per_scan": 1,
      "rows_produced_per_join": 0,
      "filtered": "49.53",
      "index_condition": "(`test`.`t2`.`b` between '2017-10-03 11:40:09' and '2017-10-03 11:40:10')",
      "cost_info": {
        "read_cost": "2.31",
        "eval_cost": "0.10",
        "prefix_cost": "2.41",
        "data_read_per_join": "7"
      },
      "used_columns": [
        "b",
        "c"
      ],
      "attached_condition": "(`test`.`t2`.`c` = 1)"
    }
  }
}
1 row in set, 1 warning (0.01 sec)

root@localhost [test]> explain format=json select count(*) from t2 force index(b) where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1\G
*************************** 1. row ***************************
EXPLAIN: {
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "2.41"
    },
    "table": {
      "table_name": "t2",
      "access_type": "range",
      "possible_keys": [
        "b"
      ],
      "key": "b",
      "used_key_parts": [
        "b"
      ],
      "key_length": "4",
      "rows_examined_per_scan": 1,
      "rows_produced_per_join": 0,
      "filtered": "50.00",
      "index_condition": "(`test`.`t2`.`b` between '2017-10-03 11:40:09' and '2017-10-03 11:40:10')",
      "cost_info": {
        "read_cost": "2.31",
        "eval_cost": "0.10",
        "prefix_cost": "2.41",
        "data_read_per_join": "8"
      },
      "used_columns": [
        "b",
        "c"
      ],
      "attached_condition": "(`test`.`t2`.`c` = 1)"
    }
  }
}
1 row in set, 1 warning (0.01 sec)

root@localhost [test]> explain format=json select count(*) from t2 force index(c) where b between '2017-10-03 11:40:09' and '2017-10-03 11:40:10' and c=1\G
*************************** 1. row ***************************
EXPLAIN: {
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "1093.00"
    },
    "table": {
      "table_name": "t2",
      "access_type": "ref",
      "possible_keys": [
        "c"
      ],
      "key": "c",
      "used_key_parts": [
        "c"
      ],
      "key_length": "5",
      "ref": [
        "const"
      ],
      "rows_examined_per_scan": 5120,
      "rows_produced_per_join": 568,
      "filtered": "11.11",
      "cost_info": {
        "read_cost": "69.00",
        "eval_cost": "113.77",
        "prefix_cost": "1093.00",
        "data_read_per_join": "8K"
      },
      "used_columns": [
        "b",
        "c"
      ],
      "attached_condition": "(`test`.`t2`.`b` between '2017-10-03 11:40:09' and '2017-10-03 11:40:10')"
    }
  }
}

Thanks,
Umesh

Hi Umesh,
  I added another test and I hope this is repeatable on your end. So, please dump the new data and then follow these steps:

1. Add 3 seconds to the minimum timestamp and store @a and @b.

mysql> SELECT @a:=TIMESTAMPADD(SECOND,3,MIN(b)), @b:=TIMESTAMPADD(SECOND,3,MIN(b)) FROM t2;
+-----------------------------------+-----------------------------------+
| @a:=TIMESTAMPADD(SECOND,3,MIN(b)) | @b:=TIMESTAMPADD(SECOND,3,MIN(b)) |
+-----------------------------------+-----------------------------------+
| 2017-10-08 04:41:59               | 2017-10-08 04:41:59               |
+-----------------------------------+-----------------------------------+
1 row in set (0.00 sec)

2. Add 1 second to @b

mysql> SELECT @b:=TIMESTAMPADD(SECOND,1,@b);
+-------------------------------+
| @b:=TIMESTAMPADD(SECOND,1,@b) |
+-------------------------------+
| 2017-10-08 04:42:00           |
+-------------------------------+
1 row in set (0.00 sec)

3. Run EXPLAIN:
mysql> EXPLAIN SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1;
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+------------------------------------+
| id | select_type | table | partitions | type  | possible_keys | key  | key_len | ref  | rows | filtered | Extra                              |
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+------------------------------------+
|  1 | SIMPLE      | t2    | NULL       | range | b,c           | b    | 4       | NULL |   48 |    49.81 | Using index condition; Using where |
+----+-------------+-------+------------+-------+---------------+------+---------+------+------+----------+------------------------------------+
1 row in set, 1 warning (0.00 sec)

4. Repeat step 2-3 until optimizer chooses c

mysql> SELECT @b:=TIMESTAMPADD(SECOND,1,@b);
+-------------------------------+
| @b:=TIMESTAMPADD(SECOND,1,@b) |
+-------------------------------+
| 2017-10-08 04:42:06           |
+-------------------------------+
1 row in set (0.00 sec)

mysql> EXPLAIN SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1;
+----+-------------+-------+------------+------+---------------+------+---------+-------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref   | rows | filtered | Extra       |
+----+-------------+-------+------------+------+---------------+------+---------+-------+------+----------+-------------+
|  1 | SIMPLE      | t2    | NULL       | ref  | b,c           | c    | 5       | const | 8192 |    12.36 | Using where |
+----+-------------+-------+------------+------+---------------+------+---------+-------+------+----------+-------------+
1 row in set, 1 warning (0.00 sec)

5. Enable profiling and run query as-is and index hints on b and c:
mysql> SET PROFILING=1;
Query OK, 0 rows affected, 1 warning (0.00 sec)

mysql> SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.02 sec)

mysql> SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.02 sec)

mysql> SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.02 sec)

mysql> SELECT count(*) FROM t2 USE INDEX(b) WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.01 sec)

mysql> SELECT count(*) FROM t2 USE INDEX(b) WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.01 sec)

mysql> SELECT count(*) FROM t2 USE INDEX(b) WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.01 sec)

mysql> SELECT count(*) FROM t2 USE INDEX(c) WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.02 sec)

mysql> SELECT count(*) FROM t2 USE INDEX(c) WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.02 sec)

mysql> SELECT count(*) FROM t2 USE INDEX(c) WHERE b BETWEEN @a AND @b and c=1;
+----------+
| count(*) |
+----------+
|     1016 |
+----------+
1 row in set (0.02 sec)

6. Run SHOW PROFILES:
mysql> SHOW PROFILES;
+----------+------------+------------------------------------------------------------------------+
| Query_ID | Duration   | Query                                                                  |
+----------+------------+------------------------------------------------------------------------+
|        1 | 0.02211975 | SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1              |
|        2 | 0.02302550 | SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1              |
|        3 | 0.02425825 | SELECT count(*) FROM t2 WHERE b BETWEEN @a AND @b and c=1              |
|        4 | 0.01171400 | SELECT count(*) FROM t2 USE INDEX(b) WHERE b BETWEEN @a AND @b and c=1 |
|        5 | 0.01144150 | SELECT count(*) FROM t2 USE INDEX(b) WHERE b BETWEEN @a AND @b and c=1 |
|        6 | 0.01317850 | SELECT count(*) FROM t2 USE INDEX(b) WHERE b BETWEEN @a AND @b and c=1 |
|        7 | 0.02305325 | SELECT count(*) FROM t2 USE INDEX(c) WHERE b BETWEEN @a AND @b and c=1 |
|        8 | 0.02235650 | SELECT count(*) FROM t2 USE INDEX(c) WHERE b BETWEEN @a AND @b and c=1 |
|        9 | 0.02413975 | SELECT count(*) FROM t2 USE INDEX(c) WHERE b BETWEEN @a AND @b and c=1 |
+----------+------------+------------------------------------------------------------------------+
9 rows in set, 1 warning (0.00 sec)

New test data for t2

Attachment: t2-1.sql (application/octet-stream, text), 1.06 KiB.

Thank you, Jaime Sicam.
Observed as described.

Thanks,
Umesh

test results - 5.7.19

Attachment: 87947_5.7.19.results (application/octet-stream, text), 11.08 KiB.

Posted by developer:
 
The problem exposed here is that range access and ref access are not comparable costs.  This can be illustrated by looking at the estimated cost for using range acccess and ref access for index c.  From optimizer trace:

                  "analyzing_range_alternatives": {
                    "range_scan_alternatives": [
                      {
                        "index": "b",
                        "ranges": [
                          "0x59e5b6f3 <= b <= 0x59e5b6f9"
                        ],
                        "index_dives_for_eq_ranges": true,
                        "rowid_ordered": false,
                        "using_mrr": false,
                        "index_only": false,
                        "rows": 2032,
                        "cost": 2439.4,
                        "chosen": true
                      },
                      {
                        "index": "c",
                        "ranges": [
                          "1 <= c <= 1"
                        ],
                        "index_dives_for_eq_ranges": true,
                        "rowid_ordered": true,
                        "using_mrr": false,
                        "index_only": false,
                        "rows": 8192,
                        "cost": 9831.4,
                        "chosen": false,
                        "cause": "cost"
                      }
                    ],
...
                "table": "`t2`",
                "best_access_path": {
                  "considered_access_paths": [
                    {
                      "access_type": "ref",
                      "index": "c",
                      "rows": 8192,
                      "cost": 1929.4,
                      "chosen": true
                    },
                    {
                      "rows_to_scan": 2032,
                      "access_type": "range",
                      "range_details": {
                        "used_index": "b"
                      },
                      "resulting_rows": 1012.2,
                      "cost": 2845.8,
                      "chosen": false
                    }
                  ]
                },

Notice how the cost for range access on c index is 9831.4 while for ref access it is 1929.4.  The overhead associated with range access is that one needs to evaluate the condition on the column for every row.  This overhead is definitely not the most significant part of the total cost of range access.  Hence, cost model need to change for this bug to be fixed.

Similar behavior on previous versions:

master [localhost] {msandbox} ((none)) > select @@version;
+------------+
| @@version  |
+------------+
| 5.6.36-log |
+------------+
1 row in set (0.00 sec)

master [localhost] {msandbox} ((none)) >   explain format=json SELECT a.profile_id,a.ad_id_in_target,a.affcode  FROM test.performance_by_affcode a USE KEY(profile_id_date) WHERE profile_id = 53  AND  DATE >=  '2018-02-28' AND DATE < NOW();
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| EXPLAIN                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| {
  "query_block": {
    "select_id": 1,
    "table": {
      "table_name": "a",
      "access_type": "ref",
      "possible_keys": [
        "profile_id_date"
      ],
      "key": "profile_id_date",
      "used_key_parts": [
        "profile_id"
      ],
      "key_length": "4",
      "ref": [
        "const"
      ],
      "rows": 19818,
      "filtered": 100,
      "index_condition": "((`test`.`a`.`date` >= '2018-02-28') and (`test`.`a`.`date` < now()))"
    }
  }
} |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set, 1 warning (0.00 sec)

master [localhost] {msandbox} ((none)) >   explain format=json SELECT a.profile_id,a.ad_id_in_target,a.affcode  FROM test.performance_by_affcode a WHERE profile_id = 53  AND  DATE >=  '2018-02-28' AND DATE < NOW();
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| EXPLAIN                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| {
  "query_block": {
    "select_id": 1,
    "table": {
      "table_name": "a",
      "access_type": "ref",
      "possible_keys": [
        "affcode_per_day",
        "profile_ad_id_date",
        "profile_affcode",
        "profile_id_date"
      ],
      "key": "affcode_per_day",
      "used_key_parts": [
        "profile_id"
      ],
      "key_length": "4",
      "ref": [
        "const"
      ],
      "rows": 23410,
      "filtered": 100,
      "using_index": true,
      "attached_condition": "((`test`.`a`.`date` >= '2018-02-28') and (`test`.`a`.`date` < <cache>(now())))"
    }
  }
} |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set, 1 warning (0.00 sec)