Bug #83776 | InnoDB FULLTEXT search returns incorrect result for operators on ignored words | ||
---|---|---|---|
Submitted: | 10 Nov 2016 20:42 | Modified: | 11 Nov 2016 18:45 |
Reporter: | Dillon Sadofsky | Email Updates: | |
Status: | Verified | Impact on me: | |
Category: | MySQL Server: FULLTEXT search | Severity: | S3 (Non-critical) |
Version: | 5.6/5.7/8.0 | OS: | Any |
Assigned to: | CPU Architecture: | Any | |
Tags: | fulltext, innodb, operator |
[10 Nov 2016 20:42]
Dillon Sadofsky
[10 Nov 2016 20:43]
Dillon Sadofsky
More specific version #'s added.
[11 Nov 2016 2:24]
MySQL Verification Team
c:\dbs>c:\dbs\5.7\bin\mysql -uroot --port=3570 -p --prompt="mysql 5.7 > " Enter password: ****** Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 4 Server version: 5.7.18-debug Source distribution PULL: 2016-NOV-05 Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective owners. Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. mysql 5.7 > create database k; Query OK, 1 row affected (0.06 sec) mysql 5.7 > use k Database changed mysql 5.7 > Create table `test`( -> `value` varchar(100) NOT NULL, -> FULLTEXT index `search` (`value`) -> ) ENGINE = InnoDB; Query OK, 0 rows affected (2.78 sec) mysql 5.7 > mysql 5.7 > TRUNCATE test; Query OK, 0 rows affected (2.72 sec) mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM'); Query OK, 1 row affected (0.11 sec) mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM S'); Query OK, 1 row affected (0.03 sec) mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD S'); Query OK, 1 row affected (0.06 sec) mysql 5.7 > INSERT INTO test (`value`) VALUES ('LONGWORD WAS MEDIUM'); Query OK, 1 row affected (0.01 sec) mysql 5.7 > # Works as expected mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.05 sec) mysql 5.7 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE) -> ; Empty set (0.00 sec) mysql 5.7 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE) -> ; Empty set (0.00 sec) mysql 5.7 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.7 > alter table test engine MyISAM; Query OK, 4 rows affected (0.91 sec) Records: 4 Duplicates: 0 Warnings: 0 mysql 5.7 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.7 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.7 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 5.7 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.7 >
[11 Nov 2016 18:45]
MySQL Verification Team
Thank you for the bug report. miguel@ural:~/dbs> ./56c Enter password: Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 1 Server version: 5.6.36 Source distribution PULL: 2016-NOV-11 Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective owners. Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. mysql 5.6 > CREATE DATABASE ah; Query OK, 1 row affected (0.00 sec) mysql 5.6 > USE ah Database changed mysql 5.6 > mysql 5.6 > Create table `test`( -> `value` varchar(100) NOT NULL, -> FULLTEXT index `search` (`value`) -> ) ENGINE = InnoDB; Query OK, 0 rows affected (2.94 sec) mysql 5.6 > mysql 5.6 > TRUNCATE test; Query OK, 0 rows affected (2.49 sec) mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM'); Query OK, 1 row affected (0.05 sec) mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM S'); Query OK, 1 row affected (0.05 sec) mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD S'); Query OK, 1 row affected (0.16 sec) mysql 5.6 > INSERT INTO test (`value`) VALUES ('LONGWORD WAS MEDIUM'); Query OK, 1 row affected (0.06 sec) mysql 5.6 > mysql 5.6 > # Works as expected mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.6 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE); Empty set (0.00 sec) mysql 5.6 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE); Empty set (0.00 sec) mysql 5.6 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.6 > ALTER TABLE test ENGINE MyISAM; Query OK, 4 rows affected (0.38 sec) Records: 4 Duplicates: 0 Warnings: 0 mysql 5.6 > # Works as expected mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.01 sec) mysql 5.6 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.6 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.6 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 5.6 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 5.6 >
[11 Nov 2016 18:50]
MySQL Verification Team
miguel@tikal:~/dbs $ ./80c Enter password: Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 4 Server version: 8.0.1-dmr Source distribution PULL: 2016-NOV-05 Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective owners. Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. mysql 8.0 > CREATE DATABASE ah; Query OK, 1 row affected (0.11 sec) mysql 8.0 > USE ah Database changed mysql 8.0 > Create table `test`( -> `value` varchar(100) NOT NULL, -> FULLTEXT index `search` (`value`) -> ) ENGINE = InnoDB; Query OK, 0 rows affected (2.15 sec) mysql 8.0 > mysql 8.0 > TRUNCATE test; Query OK, 0 rows affected (4.31 sec) mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM'); Query OK, 1 row affected (0.03 sec) mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD MEDIUM S'); Query OK, 1 row affected (0.06 sec) mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD S'); Query OK, 1 row affected (0.15 sec) mysql 8.0 > INSERT INTO test (`value`) VALUES ('LONGWORD WAS MEDIUM'); Query OK, 1 row affected (0.04 sec) mysql 8.0 > mysql 8.0 > # Works as expected mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE); Empty set (0.00 sec) mysql 8.0 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE); Empty set (0.00 sec) mysql 8.0 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > ALTER TABLE test ENGINE MyISAM; Query OK, 4 rows affected (0.77 sec) Records: 4 Duplicates: 0 Warnings: 0 mysql 8.0 > # Works as expected mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # Works as expected mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # If S (as a short word) is ignored, you'd expect this to be the same as the above, but it will find nothing mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +S' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # Similarly, since AND is a stopword, even if the server is set up to search 3 letter words, this also finds nothing mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD +WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 > # No operator allows the stopword to be truly optional, resulting in what we expect: mysql 8.0 > SELECT * FROM test WHERE MATCH(`value`) AGAINST ('+LONGWORD WAS' IN BOOLEAN MODE); +---------------------+ | value | +---------------------+ | LONGWORD MEDIUM | | LONGWORD MEDIUM S | | LONGWORD S | | LONGWORD WAS MEDIUM | +---------------------+ 4 rows in set (0.00 sec) mysql 8.0 >