commit 232e77fe4a925fc82714027336a91ba22b815b5a Author: Zsolt Parragi Date: Wed Mar 28 13:12:01 2018 +0200 Fix bug 90229 / PS-3928 (Fix for complex case insensitive full text queries) These queries tried to validate the result by matching it back to the original result, converted to lower case form. As queries using binary collections weren't converted to lowercase, these validations failed when the query contained any upper case characters. This caused ngram queries which contained upper case characters and were longer than the ngram length to return no results. (cherry picked from commit e84126949e62fd6fa922d23ba6870a39ac2c1c61) diff --git a/mysql-test/suite/innodb_fts/r/bug78048.result b/mysql-test/suite/innodb_fts/r/bug78048.result new file mode 100644 index 00000000000..7897cc313ec --- /dev/null +++ b/mysql-test/suite/innodb_fts/r/bug78048.result @@ -0,0 +1,48 @@ +create table `ngram_simple` ( +`i` int(11) not null auto_increment, +`txt` text collate utf8mb4_bin not null, +primary key (`i`), +fulltext key `fx_txts` (`txt`) ) engine=InnoDB auto_increment=10 default charset=utf8mb4 collate=utf8mb4_bin; +insert into ngram_simple (txt) values ('CompP&C01'); +insert into ngram_simple (txt) values ('CompP&C02'); +insert into ngram_simple (txt) values ('CompP&C03'); +insert into ngram_simple (txt) values ('CompP&C04'); +insert into ngram_simple (txt) values ('CompP&C05'); +insert into ngram_simple (txt) values ('CompP&C06'); +insert into ngram_simple (txt) values ('CompP&c04'); +insert into ngram_simple (txt) values ('abc*efg'); +insert into ngram_simple (txt) values ('abc&efg'); +insert into ngram_simple (txt) values ('abC&efGh'); +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +i txt +17 abc*efg +18 abc&efg +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +i txt +19 abC&efGh +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +i txt +13 CompP&C04 +select * from ngram_simple where match(txt) against ('c04' in boolean mode); +i txt +16 CompP&c04 +alter table ngram_simple drop key fx_txts; +alter table ngram_simple add fulltext key `fx_txts` (`txt`) with parser ngram; +optimize table ngram_simple; +Table Op Msg_type Msg_text +test.ngram_simple optimize note Table does not support optimize, doing recreate + analyze instead +test.ngram_simple optimize status OK +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +i txt +17 abc*efg +18 abc&efg +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +i txt +19 abC&efGh +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +i txt +13 CompP&C04 +select * from ngram_simple where match(txt) against ('c04' in boolean mode); +i txt +16 CompP&c04 +drop table ngram_simple; diff --git a/mysql-test/suite/innodb_fts/t/bug78048.test b/mysql-test/suite/innodb_fts/t/bug78048.test new file mode 100644 index 00000000000..ba3ea9e422b --- /dev/null +++ b/mysql-test/suite/innodb_fts/t/bug78048.test @@ -0,0 +1,38 @@ +# Bug #78048: Complex case insensitive full text queries returned no results +# +# This was especially noticable with ngram indices, where a query longer than +# the ngram length was interpreted as several concatenated queries + +create table `ngram_simple` ( +`i` int(11) not null auto_increment, +`txt` text collate utf8mb4_bin not null, +primary key (`i`), +fulltext key `fx_txts` (`txt`) ) engine=InnoDB auto_increment=10 default charset=utf8mb4 collate=utf8mb4_bin; + +insert into ngram_simple (txt) values ('CompP&C01'); +insert into ngram_simple (txt) values ('CompP&C02'); +insert into ngram_simple (txt) values ('CompP&C03'); +insert into ngram_simple (txt) values ('CompP&C04'); +insert into ngram_simple (txt) values ('CompP&C05'); +insert into ngram_simple (txt) values ('CompP&C06'); +insert into ngram_simple (txt) values ('CompP&c04'); +insert into ngram_simple (txt) values ('abc*efg'); +insert into ngram_simple (txt) values ('abc&efg'); +insert into ngram_simple (txt) values ('abC&efGh'); + +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +select * from ngram_simple where match(txt) against ('c04' in boolean mode); + +alter table ngram_simple drop key fx_txts; +alter table ngram_simple add fulltext key `fx_txts` (`txt`) with parser ngram; +optimize table ngram_simple; + +select * from ngram_simple where match(txt) against ('abc' in boolean mode); +select * from ngram_simple where match(txt) against ('abC' in boolean mode); +select * from ngram_simple where match(txt) against ('C04' in boolean mode); +select * from ngram_simple where match(txt) against ('c04' in boolean mode); + +drop table ngram_simple; + diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 504aad9f42c..1f6e435300a 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6661,7 +6661,9 @@ int innobase_fts_text_case_cmp(const void *cs, /*!< in: Character set */ const fts_string_t *s2 = (const fts_string_t *)p2; ulint newlen; - my_casedn_str(charset, (char *)s2->f_str); + if (!my_binary_compare(charset)) { + my_casedn_str(charset, (char *)s2->f_str); + } newlen = strlen((const char *)s2->f_str);