From e84126949e62fd6fa922d23ba6870a39ac2c1c61 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Wed, 28 Mar 2018 13:12:01 +0200 Subject: [PATCH] PS-3928: Fix for complex case insensitive full text queries These queries tried to validate the result by matching it back to the original result, converted to lower case form. As queries using binary collections weren't converted to lowercase, these validations failed when the query contained any upper case characters. This caused ngram queries which contained upper case characters and were longer than the ngram length to return no results. --- mysql-test/suite/innodb_fts/r/bug78048.result | 48 +++++++++++++++++++++++++++ mysql-test/suite/innodb_fts/t/bug78048.test | 40 ++++++++++++++++++++++ storage/innobase/handler/ha_innodb.cc | 4 ++- 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/innodb_fts/r/bug78048.result create mode 100644 mysql-test/suite/innodb_fts/t/bug78048.test diff --git a/mysql-test/suite/innodb_fts/r/bug78048.result b/mysql-test/suite/innodb_fts/r/bug78048.result new file mode 100644 index 00000000000..7897cc313ec --- /dev/null +++ b/mysql-test/suite/innodb_fts/r/bug78048.result @@ -0,0 +1,48 @@ +create table `ngram_simple` ( +`i` int(11) not null auto_increment, +`txt` text collate utf8mb4_bin not null, +primary key (`i`), +fulltext key `fx_txts` (`txt`) ) engine=InnoDB auto_increment=10 default charset=utf8mb4 collate=utf8mb4_bin; +insert into ngram_simple (txt) values ('CompP&C01'); +insert into ngram_simple (txt) values ('CompP&C02'); +insert into ngram_simple (txt) values ('CompP&C03'); +insert into ngram_simple (txt) values ('CompP&C04'); +insert into ngram_simple (txt) values ('CompP&C05'); +insert into ngram_simple (txt) values ('CompP&C06'); +insert into ngram_simple (txt) values ('CompP&c04'); +insert into ngram_simple (txt) values ('abc*efg'); +insert into ngram_simple (txt) values ('abc&efg'); +insert into ngram_simple (txt) values ('abC&efGh'); +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +i txt +17 abc*efg +18 abc&efg +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +i txt +19 abC&efGh +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +i txt +13 CompP&C04 +select * from ngram_simple where match(txt) against ('c04' in boolean mode); +i txt +16 CompP&c04 +alter table ngram_simple drop key fx_txts; +alter table ngram_simple add fulltext key `fx_txts` (`txt`) with parser ngram; +optimize table ngram_simple; +Table Op Msg_type Msg_text +test.ngram_simple optimize note Table does not support optimize, doing recreate + analyze instead +test.ngram_simple optimize status OK +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +i txt +17 abc*efg +18 abc&efg +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +i txt +19 abC&efGh +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +i txt +13 CompP&C04 +select * from ngram_simple where match(txt) against ('c04' in boolean mode); +i txt +16 CompP&c04 +drop table ngram_simple; diff --git a/mysql-test/suite/innodb_fts/t/bug78048.test b/mysql-test/suite/innodb_fts/t/bug78048.test new file mode 100644 index 00000000000..2245b48edda --- /dev/null +++ b/mysql-test/suite/innodb_fts/t/bug78048.test @@ -0,0 +1,40 @@ +# Bug #78048: Complex case insensitive full text queries returned no results +# +# This was especially noticable with ngram indices, where a query longer than +# the ngram length was interpreted as several concatenated queries + +--source include/have_innodb.inc + +create table `ngram_simple` ( +`i` int(11) not null auto_increment, +`txt` text collate utf8mb4_bin not null, +primary key (`i`), +fulltext key `fx_txts` (`txt`) ) engine=InnoDB auto_increment=10 default charset=utf8mb4 collate=utf8mb4_bin; + +insert into ngram_simple (txt) values ('CompP&C01'); +insert into ngram_simple (txt) values ('CompP&C02'); +insert into ngram_simple (txt) values ('CompP&C03'); +insert into ngram_simple (txt) values ('CompP&C04'); +insert into ngram_simple (txt) values ('CompP&C05'); +insert into ngram_simple (txt) values ('CompP&C06'); +insert into ngram_simple (txt) values ('CompP&c04'); +insert into ngram_simple (txt) values ('abc*efg'); +insert into ngram_simple (txt) values ('abc&efg'); +insert into ngram_simple (txt) values ('abC&efGh'); + +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +select * from ngram_simple where match(txt) against ('c04' in boolean mode); + +alter table ngram_simple drop key fx_txts; +alter table ngram_simple add fulltext key `fx_txts` (`txt`) with parser ngram; +optimize table ngram_simple; + +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +select * from ngram_simple where match(txt) against ('c04' in boolean mode); + +drop table ngram_simple; + diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 2e9b64f2a6f..9cd63cb4387 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -7173,7 +7173,9 @@ innobase_fts_text_case_cmp( const fts_string_t* s2 = (const fts_string_t*) p2; ulint newlen; - my_casedn_str(charset, (char*) s2->f_str); + if (!my_binary_compare(charset)) { + my_casedn_str(charset, (char*) s2->f_str); + } newlen = strlen((const char*) s2->f_str);