Bug #111479 Optimizer estimated rows is always equal to 1 when using spatial index.
Submitted: 19 Jun 2023 4:24 Modified: 19 Jun 2023 11:42
Reporter: Aristotle Po Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Optimizer Severity:S3 (Non-critical)
Version:5.7.42 OS:Any
Assigned to: CPU Architecture:Any
Tags: spatial index

[19 Jun 2023 4:24] Aristotle Po
Description:
############################################
# Optimizer estimated rows is always equal to 1 when using spatial index when IP start equals to IP end.
############################################

mysql [localhost:5742] {msandbox} (db6) > SET @new_ip_start = INET_ATON('1.1.1.1');
mysql [localhost:5742] {msandbox} (db6) > SET @new_ip_end   = INET_ATON('1.1.1.1');
mysql [localhost:5742] {msandbox} (db6) > EXPLAIN
    -> SELECT COUNT(*) FROM ip_ranges
    ->   WHERE MBRIntersects(ip_range,
    ->     Polygon(
    ->       LineString(
    ->         Point(@new_ip_start, 1),
    ->         Point(@new_ip_start, 0),
    ->         Point(@new_ip_end, 0),
    ->         Point(@new_ip_end, 1),
    ->         Point(@new_ip_start, 1)
    ->       )
    ->     )
    ->   )\G
*************************** 1. row ***************************
           id: 1
  select_type: SIMPLE
        table: ip_ranges
   partitions: NULL
         type: range
possible_keys: index_ip_ranges_on_ip_range
          key: index_ip_ranges_on_ip_range
      key_len: 34
          ref: NULL
         rows: 1
     filtered: 100.00
        Extra: Using where
1 row in set, 1 warning (0.00 sec)

############################################
# Actual count :
############################################
| COUNT(*) |
+----------+
|    32768 |

How to repeat:
############################################
# Prepare the schema 
############################################

DROP  DATABASE db6;
CREATE DATABASE db6;
USE db6;
CREATE TABLE ip_ranges(
        i INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY
        , ip_start int unsigned not null
        , ip_end int unsigned not null
        , KEY idx_start_end(ip_start,ip_end)
);

SET @new_ip_start = INET_ATON('1.1.1.1');
SET @new_ip_end   = INET_ATON('1.1.3.3');
SET @new2_ip_start= INET_ATON('2.1.1.1');
SET @new2_ip_end  = INET_ATON('2.1.3.3');

INSERT INTO  ip_ranges(ip_start, ip_end) VALUES (@new_ip_start , @new_ip_end) ;
INSERT INTO  ip_ranges(ip_start, ip_end) VALUES (@new2_ip_start, @new2_ip_end) ;

-- Below schema changes is based from https://bignerdranch.com/blog/using-mysql-spatial-extensions-for-range-queries/

ALTER TABLE ip_ranges add ip_range POLYGON NULL;
UPDATE ip_ranges
  SET ip_range=Polygon(
    LineString(
      Point(ip_start, 1),
      Point(ip_start, 0),
      Point(ip_end, 0),
      Point(ip_end, 1),
      Point(ip_start, 1)
    )
  );
ALTER TABLE ip_ranges MODIFY ip_range POLYGON NOT NULL;
CREATE SPATIAL INDEX index_ip_ranges_on_ip_range on ip_ranges (ip_range);

############################################
# Via linux terminal : Load data
############################################

############################################
# Insert data with same value as "i=1";
############################################
for i in {1..15};
do
        mysql -h127.0.0.1 -P5742 db6 -e"INSERT INTO ip_ranges(ip_start, ip_end, ip_range) SELECT ip_start, ip_end, ip_range FROM ip_ranges v1 WHERE EXISTS ( SELECT 1 FROM ip_ranges v2 WHERE v1.ip_start=v2.ip_start AND v1.ip_end=v2.ip_end AND v2.i=1)"
done
############################################
# Insert data with same value as "i=2";
############################################
for i in {1..18};
do
        mysql -h127.0.0.1 -P5742 db6 -e"INSERT INTO ip_ranges(ip_start, ip_end, ip_range) SELECT ip_start, ip_end, ip_range FROM ip_ranges v1 WHERE EXISTS ( SELECT 1 FROM ip_ranges v2 WHERE v1.ip_start=v2.ip_start AND v1.ip_end=v2.ip_end AND v2.i=1)"
done

#####################################################
# We now have data :
#####################################################
mysql [localhost:5742] {msandbox} (db6) > SELECT COUNT(*), INET_NTOA(ip_start) ip_s, INET_NTOA(ip_end) ip_e,  ip_range FROM ip_ranges GROUP BY 2,3,4;
+----------+---------+---------+---------------------------------------------------------------------------------------------------+
| COUNT(*) | ip_s    | ip_e    | ip_range                                                                                          |
+----------+---------+---------+---------------------------------------------------------------------------------------------------+
|    32768 | 1.1.1.1 | 1.1.3.3 |                 pA      �?   pA           00pA           00pA      �?   pA      �?    |
|   262144 | 2.1.1.1 | 2.1.3.3 |              �A      �?�A          �A          �A      �?�A      �?         |
+----------+---------+---------+---------------------------------------------------------------------------------------------------+
2 rows in set (0.79 sec)

############################################
# test-1.sh : we will use this bash script in testing 
############################################
mysql -h127.0.0.1 -P5742 db6 -t << EOF
SET @new_ip_start = INET_ATON('1.1.1.1');
SET @new_ip_end   = INET_ATON('$1');
$2
SELECT COUNT(*) FROM ip_ranges
  WHERE MBRIntersects(ip_range,
    Polygon(
      LineString(
        Point(@new_ip_start, 1),
        Point(@new_ip_start, 0),
        Point(@new_ip_end, 0),
        Point(@new_ip_end, 1),
        Point(@new_ip_start, 1)
      )
    )
  );
EOF

############################################
# Test 1 : OK : The optimizer was able to give a good estimated rows
############################################
shell> bash test-1.sh 1.1.3.3 EXPLAIN
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+-------+----------+-------------+
| id | select_type | table     | partitions | type  | possible_keys               | key                         | key_len | ref  | rows  | filtered | Extra       |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+-------+----------+-------------+
|  1 | SIMPLE      | ip_ranges | NULL       | range | index_ip_ranges_on_ip_range | index_ip_ranges_on_ip_range | 34      | NULL | 36562 |   100.00 | Using where |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+-------+----------+-------------+

############################################
# Test 2 : Make new_ip_end same with new_ip_start.
# Issue : The optimizer was not able to able to to give a good row estimate and is always 1.
############################################
shell> bash test-1.sh 1.1.1.1 EXPLAIN
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
| id | select_type | table     | partitions | type  | possible_keys               | key                         | key_len | ref  | rows | filtered | Extra       |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
|  1 | SIMPLE      | ip_ranges | NULL       | range | index_ip_ranges_on_ip_range | index_ip_ranges_on_ip_range | 34      | NULL |    1 |   100.00 | Using where |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+

############################################
# Test 1 and 2 : Have same count.
############################################
shell> bash test-1.sh 1.1.3.3
+----------+
| COUNT(*) |
+----------+
|    32768 |
+----------+
shell> bash test-1.sh 1.1.1.1
+----------+
| COUNT(*) |
+----------+
|    32768 |
+----------+

#####################################################
# Test 3 : This has similar behavior with test 2 where its estimated rows is always equal to 1
#####################################################

mysql [localhost:5742] {msandbox} (db6) > EXPLAIN SELECT COUNT(*) FROM ip_ranges WHERE MBRINTERSECTS(ip_range, Point(INET_ATON('1.1.1.1'), 0));
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
| id | select_type | table     | partitions | type  | possible_keys               | key                         | key_len | ref  | rows | filtered | Extra       |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
|  1 | SIMPLE      | ip_ranges | NULL       | range | index_ip_ranges_on_ip_range | index_ip_ranges_on_ip_range | 34      | NULL |    1 |   100.00 | Using where |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
1 row in set, 1 warning (0.00 sec)

mysql [localhost:5742] {msandbox} (db6) > EXPLAIN SELECT COUNT(*) FROM ip_ranges WHERE    INTERSECTS(ip_range, Point(INET_ATON('1.1.1.1'), 0));
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
| id | select_type | table     | partitions | type  | possible_keys               | key                         | key_len | ref  | rows | filtered | Extra       |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
|  1 | SIMPLE      | ip_ranges | NULL       | range | index_ip_ranges_on_ip_range | index_ip_ranges_on_ip_range | 34      | NULL |    1 |   100.00 | Using where |
+----+-------------+-----------+------------+-------+-----------------------------+-----------------------------+---------+------+------+----------+-------------+
1 row in set, 2 warnings (0.00 sec)

mysql [localhost:5742] {msandbox} (db6) > SELECT COUNT(*) FROM ip_ranges WHERE MBRINTERSECTS(ip_range, Point(INET_ATON('1.1.1.1'), 0));
+----------+
| COUNT(*) |
+----------+
|    32768 |
+----------+
1 row in set (0.03 sec)

mysql [localhost:5742] {msandbox} (db6) > SELECT COUNT(*) FROM ip_ranges WHERE    INTERSECTS(ip_range, Point(INET_ATON('1.1.1.1'), 0));
+----------+
| COUNT(*) |
+----------+
|    32768 |
+----------+
1 row in set, 1 warning (0.03 sec)

Suggested fix:
Please make the optimizer estimated rows accurate regardless the value of ip_end in the test case.
[19 Jun 2023 11:42] MySQL Verification Team
Hello Aristotle Po,

Thank you for the report and feedback.
Verified as described.

regards,
Umesh