| Bug #83776 | InnoDB FULLTEXT search returns incorrect result for operators on ignored words | ||
|---|---|---|---|
| Submitted: | 10 Nov 2016 20:42 | Modified: | 11 Nov 2016 18:45 |
| Reporter: | Dillon Sadofsky | Email Updates: | |
| Status: | Verified | Impact on me: | |
| Category: | MySQL Server: FULLTEXT search | Severity: | S3 (Non-critical) |
| Version: | 5.6/5.7/8.0 | OS: | Any |
| Assigned to: | CPU Architecture: | Any | |
| Tags: | fulltext, innodb, operator | ||
[10 Nov 2016 20:42]
Dillon Sadofsky
[10 Nov 2016 20:43]
Dillon Sadofsky
More specific version #'s added.
[11 Nov 2016 2:24]
MySQL Verification Team
c:\dbs>c:\dbs\5.7\bin\mysql -uroot --port=3570 -p --prompt="mysql 5.7 > "
Enter password: ******
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 5.7.18-debug Source distribution PULL: 2016-NOV-05
Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql 5.7 > create database k;
Query OK, 1 row affected (0.06 sec)
mysql 5.7 > use k
Database changed
mysql 5.7 > Create table `test`(
-> `value` varchar(100) NOT NULL,
-> FULLTEXT index `search` (`value`)
-> ) ENGINE = InnoDB;
Query OK, 0 rows affected (2.78 sec)
mysql 5.7 >
mysql 5.7 > TRUNCATE test;
Query OK, 0 rows affected (2.72 sec)
mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM');
Query OK, 1 row affected (0.11 sec)
mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM S');
Query OK, 1 row affected (0.03 sec)
mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD S');
Query OK, 1 row affected (0.06 sec)
mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD WAS MEDIUM');
Query OK, 1 row affected (0.01 sec)
mysql 5.7 > # Works as expected
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.05 sec)
mysql 5.7 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE)
-> ;
Empty set (0.00 sec)
mysql 5.7 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE)
-> ;
Empty set (0.00 sec)
mysql 5.7 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.7 > alter table test engine MyISAM;
Query OK, 4 rows affected (0.91 sec)
Records: 4 Duplicates: 0 Warnings: 0
mysql 5.7 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.7 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.7 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.7 >
[11 Nov 2016 18:45]
MySQL Verification Team
Thank you for the bug report.
miguel@ural:~/dbs> ./56c
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 1
Server version: 5.6.36 Source distribution PULL: 2016-NOV-11
Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql 5.6 > CREATE DATABASE ah;
Query OK, 1 row affected (0.00 sec)
mysql 5.6 > USE ah
Database changed
mysql 5.6 >
mysql 5.6 > Create table `test`(
-> `value` varchar(100) NOT NULL,
-> FULLTEXT index `search` (`value`)
-> ) ENGINE = InnoDB;
Query OK, 0 rows affected (2.94 sec)
mysql 5.6 >
mysql 5.6 > TRUNCATE test;
Query OK, 0 rows affected (2.49 sec)
mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM');
Query OK, 1 row affected (0.05 sec)
mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM S');
Query OK, 1 row affected (0.05 sec)
mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD S');
Query OK, 1 row affected (0.16 sec)
mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD WAS MEDIUM');
Query OK, 1 row affected (0.06 sec)
mysql 5.6 >
mysql 5.6 > # Works as expected
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.6 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE);
Empty set (0.00 sec)
mysql 5.6 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE);
Empty set (0.00 sec)
mysql 5.6 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.6 > ALTER TABLE test ENGINE MyISAM;
Query OK, 4 rows affected (0.38 sec)
Records: 4 Duplicates: 0 Warnings: 0
mysql 5.6 > # Works as expected
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.01 sec)
mysql 5.6 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.6 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.6 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 5.6 >
[11 Nov 2016 18:50]
MySQL Verification Team
miguel@tikal:~/dbs $ ./80c
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 4
Server version: 8.0.1-dmr Source distribution PULL: 2016-NOV-05
Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql 8.0 > CREATE DATABASE ah;
Query OK, 1 row affected (0.11 sec)
mysql 8.0 > USE ah
Database changed
mysql 8.0 > Create table `test`(
-> `value` varchar(100) NOT NULL,
-> FULLTEXT index `search` (`value`)
-> ) ENGINE = InnoDB;
Query OK, 0 rows affected (2.15 sec)
mysql 8.0 >
mysql 8.0 > TRUNCATE test;
Query OK, 0 rows affected (4.31 sec)
mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM');
Query OK, 1 row affected (0.03 sec)
mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM S');
Query OK, 1 row affected (0.06 sec)
mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD S');
Query OK, 1 row affected (0.15 sec)
mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD WAS MEDIUM');
Query OK, 1 row affected (0.04 sec)
mysql 8.0 >
mysql 8.0 > # Works as expected
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE);
Empty set (0.00 sec)
mysql 8.0 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE);
Empty set (0.00 sec)
mysql 8.0 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > ALTER TABLE test ENGINE MyISAM;
Query OK, 4 rows affected (0.77 sec)
Records: 4 Duplicates: 0 Warnings: 0
mysql 8.0 > # Works as expected
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # Works as expected
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 > # No operator allows the stopword to be truly optional, resulting in what we expect:
mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE);
+---------------------+
| value |
+---------------------+
| LONGWORD MEDIUM |
| LONGWORD MEDIUM S |
| LONGWORD S |
| LONGWORD WAS MEDIUM |
+---------------------+
4 rows in set (0.00 sec)
mysql 8.0 >
