Bug #3358 MySQL Server can not handle column references in queries when db is in UTF8
Submitted: 1 Apr 2004 6:26 Modified: 1 Apr 2004 6:50
Reporter: Hakan Özkara Email Updates:
Status: Closed Impact on me:
None 
Category:MySQL Server Severity:S1 (Critical)
Version:4.1.1 OS:Windows (Windows XP)
Assigned to: CPU Architecture:Any

[1 Apr 2004 6:26] Hakan Özkara
Description:
####################### BUG REPORT ###########################
#                                                            #
# MYSQL Server Version  : 4.1.1a-alpha-NT with default       #
#                         installation                       #
#                                                            #
# Operating System      : Windows XP, Windows 2000, Windows  #
#                         Server 2003                        #
#                                                            # 
# Problem Description   : MySQL Server 4.1.1a can not        #
#                         correctly resolve and match the    #
#                         string type of references in       #
#                         where clauses when character       #
#                         set of database is utf8 and        #
#                         referenced column is also given    #
#                         with a contant constraint in       #
#                         the where clause, although it      #
#                         can when character set is          #
#                         latin1, or  constants are used     #
#                         in the where codition instead      #
#                         of references, or referenced       #
#                         column is integer. But, when we    #
#                         set the character_set_server       #
#                         parameter to utf8 then problem     #
#                         occurs in both databases. Please   #
#                         refer to the CONCLUSION part at    #
#                         the end of this report.            #
#                                                            #
#                                                            #
# How to test           : 1. Install MySQL 4.1.1a on a       #
#                            Windows installed computer.     #
#                         2. Copy this file as c:\bugrep.sql # 
#                         3. Open mysql client console       #
#                         4. run "source c:\bugrep.sql"      #
#                         5. Watch the results               #
#                                                            #
# Authors               : Hakan Ozkara / hozkara@ias.com.tr  #
#                         Industrial Application Software    #
#                         R&D Department                     #
#                                                            #
#                         Ozgür Yavuz / y.ozgur@iascon.de    #
#                         Industrial Application Software    #
#                         System Constultant                 #
#                                                            #
############################################################## 

How to repeat:

##############################################################
#  FIRST TRY: WITH UTF8 DATABASE & LATIN1 SERVER CHARSET     #
#      There are Problems With Where Conditions              #
##############################################################

USE mysql;
DROP DATABASE mydb_utf8;

# We create database with utf8 characterset
CREATE DATABASE mydb_utf8 DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;

USE mydb_utf8;

CREATE TABLE T1 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	A VARCHAR(50) ) ;	

# adding an index does not change anyhing	
CREATE UNIQUE INDEX T1_I1 ON T1 (KEY1,KEY2,KEY3);
	
INSERT INTO T1 VALUES ( '00','01',1,'header 1' );	
INSERT INTO T1 VALUES ( '00','01',2,'header 2' );
INSERT INTO T1 VALUES ( '00','01',3,'header 3' );
INSERT INTO T1 VALUES ( '00','01',4,'header 4' );

	
CREATE TABLE T2 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	KEY4 INTEGER NOT NULL,	
	B VARCHAR(50) ) ;	
	
# adding an index does not change anyhing	
CREATE UNIQUE INDEX T2_I1 ON T2 (KEY1,KEY2,KEY3,KEY4);	
	
INSERT INTO T2 VALUES ( '00','01',1,1,'item 1-1' );	
INSERT INTO T2 VALUES ( '00','01',1,2,'item 1-2' );
INSERT INTO T2 VALUES ( '00','01',1,3,'item 1-3' );

INSERT INTO T2 VALUES ( '00','01',2,1,'item 2-1' );	
INSERT INTO T2 VALUES ( '00','01',2,2,'item 2-2' );
INSERT INTO T2 VALUES ( '00','01',2,3,'item 2-3' );

INSERT INTO T2 VALUES ( '00','01',3,1,'item 3-1' );	
INSERT INTO T2 VALUES ( '00','01',3,2,'item 3-2' );
INSERT INTO T2 VALUES ( '00','01',3,3,'item 3-3' );

# ***************** PROBLEM  IS HERE **********************
# this select returns 0 rows although there should be 3 rows 

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  this select returns correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = '00' AND
	T2.KEY2 = '01' AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  but when we remove conditions with constants, then it returns correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	# remove this line  T1.KEY1 = '00' AND
	# remove this line  T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

# but, if we use binary flag then it returns correct resultset with 3 rows

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	binary T2.KEY1 = binary T1.KEY1 AND
	binary T2.KEY2 = binary T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

##############################################################
#  SECOND TRY: WITH LATIN1 DATABASE & LATIN1 SERVER CHARSET  #
#   There are NO Problems with Where Conditions              #
##############################################################

USE mysql;
DROP DATABASE mydb_latin1;

# Then we create the same database with default latin1 characterset
CREATE DATABASE mydb_latin1;
USE mydb_latin1;

CREATE TABLE T1 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	A VARCHAR(50) ) ;	
# adding an index does not change anyhing	
CREATE UNIQUE INDEX T1_I1 ON T1 (KEY1,KEY2,KEY3);	
	
INSERT INTO T1 VALUES ( '00','01',1,'header 1' );	
INSERT INTO T1 VALUES ( '00','01',2,'header 2' );
INSERT INTO T1 VALUES ( '00','01',3,'header 3' );
INSERT INTO T1 VALUES ( '00','01',4,'header 4' );

	
CREATE TABLE T2 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	KEY4 INTEGER NOT NULL,	
	B VARCHAR(50) ) ;	
# adding an index does not change anyhing	
CREATE UNIQUE INDEX T2_I1 ON T2 (KEY1,KEY2,KEY3,KEY4);
	
INSERT INTO T2 VALUES ( '00','01',1,1,'item 1-1' );	
INSERT INTO T2 VALUES ( '00','01',1,2,'item 1-2' );
INSERT INTO T2 VALUES ( '00','01',1,3,'item 1-3' );

INSERT INTO T2 VALUES ( '00','01',2,1,'item 2-1' );	
INSERT INTO T2 VALUES ( '00','01',2,2,'item 2-2' );
INSERT INTO T2 VALUES ( '00','01',2,3,'item 2-3' );

INSERT INTO T2 VALUES ( '00','01',3,1,'item 3-1' );	
INSERT INTO T2 VALUES ( '00','01',3,2,'item 3-2' );
INSERT INTO T2 VALUES ( '00','01',3,3,'item 3-3' );

#  now this select returns the correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  this select returns correct resultset   
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = '00' AND
	T2.KEY2 = '01' AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  this select returns correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	# remove this line  T1.KEY1 = '00' AND
	# remove this line  T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

# if we use binary flag again it returns correct resultset with 3 rows

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	binary T2.KEY1 = binary T1.KEY1 AND
	binary T2.KEY2 = binary T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

##############################################################
#   THIRD TRY: WITH UTF8 DATABASE & UTF8 SERVER CHARSET      #
#      There are Problems With Where Conditions              #
##############################################################

# ********** NOW PROBLEM IS WITH LATIN1 DATABASES TOO ********
# Now we set the variable character_set_server to utf8. Then *
# Where condition is processed faulty in both databases.     *
# Adding the "default-character-set=utf8" line into the      *
# my.ini file causes same problem again.                     *
# ************************************************************

SET character_set_server=utf8;

USE mysql;
DROP DATABASE mydb_utf8;

# We create database with utf8 characterset
CREATE DATABASE mydb_utf8 DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;

USE mydb_utf8;

CREATE TABLE T1 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	A VARCHAR(50) ) ;	

# adding an index does not change anyhing	
CREATE UNIQUE INDEX T1_I1 ON T1 (KEY1,KEY2,KEY3);
	
INSERT INTO T1 VALUES ( '00','01',1,'header 1' );	
INSERT INTO T1 VALUES ( '00','01',2,'header 2' );
INSERT INTO T1 VALUES ( '00','01',3,'header 3' );
INSERT INTO T1 VALUES ( '00','01',4,'header 4' );

	
CREATE TABLE T2 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	KEY4 INTEGER NOT NULL,	
	B VARCHAR(50) ) ;	
	
# adding an index does not change anyhing	
CREATE UNIQUE INDEX T2_I1 ON T2 (KEY1,KEY2,KEY3,KEY4);	
	
INSERT INTO T2 VALUES ( '00','01',1,1,'item 1-1' );	
INSERT INTO T2 VALUES ( '00','01',1,2,'item 1-2' );
INSERT INTO T2 VALUES ( '00','01',1,3,'item 1-3' );

INSERT INTO T2 VALUES ( '00','01',2,1,'item 2-1' );	
INSERT INTO T2 VALUES ( '00','01',2,2,'item 2-2' );
INSERT INTO T2 VALUES ( '00','01',2,3,'item 2-3' );

INSERT INTO T2 VALUES ( '00','01',3,1,'item 3-1' );	
INSERT INTO T2 VALUES ( '00','01',3,2,'item 3-2' );
INSERT INTO T2 VALUES ( '00','01',3,3,'item 3-3' );

# ***************** PROBLEM  IS HERE **********************
# this select returns 0 rows although there should be 3 rows 

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  this select returns correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = '00' AND
	T2.KEY2 = '01' AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  but when we remove conditions with constants, then it returns correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	# remove this line  T1.KEY1 = '00' AND
	# remove this line  T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

# if use binary flag then it returns correct resultset with 3 rows

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	binary T2.KEY1 = binary T1.KEY1 AND
	binary T2.KEY2 = binary T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

##############################################################
#   FOURTH TRY: WITH LATIN1 DATABASE & UTF8 SERVER CHARSET   #
#      There are Problems With Where Conditions              #
##############################################################

USE mysql;
DROP DATABASE mydb_latin1;

# Then we create the same database with default latin1 characterset
CREATE DATABASE mydb_latin1;
USE mydb_latin1;

CREATE TABLE T1 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	A VARCHAR(50) ) ;	
# adding an index does not change anyhing	
CREATE UNIQUE INDEX T1_I1 ON T1 (KEY1,KEY2,KEY3);	
	
INSERT INTO T1 VALUES ( '00','01',1,'header 1' );	
INSERT INTO T1 VALUES ( '00','01',2,'header 2' );
INSERT INTO T1 VALUES ( '00','01',3,'header 3' );
INSERT INTO T1 VALUES ( '00','01',4,'header 4' );

	
CREATE TABLE T2 (
	KEY1 VARCHAR(2) NOT NULL,
	KEY2 VARCHAR(2) NOT NULL,
	KEY3 INTEGER NOT NULL,
	KEY4 INTEGER NOT NULL,	
	B VARCHAR(50) ) ;	
# adding an index does not change anyhing	
CREATE UNIQUE INDEX T2_I1 ON T2 (KEY1,KEY2,KEY3,KEY4);
	
INSERT INTO T2 VALUES ( '00','01',1,1,'item 1-1' );	
INSERT INTO T2 VALUES ( '00','01',1,2,'item 1-2' );
INSERT INTO T2 VALUES ( '00','01',1,3,'item 1-3' );

INSERT INTO T2 VALUES ( '00','01',2,1,'item 2-1' );	
INSERT INTO T2 VALUES ( '00','01',2,2,'item 2-2' );
INSERT INTO T2 VALUES ( '00','01',2,3,'item 2-3' );

INSERT INTO T2 VALUES ( '00','01',3,1,'item 3-1' );	
INSERT INTO T2 VALUES ( '00','01',3,2,'item 3-2' );
INSERT INTO T2 VALUES ( '00','01',3,3,'item 3-3' );

# ***************** NOW, PROBLEM OCCURS HERE TOO ***********
# this select returns 0 rows although there should be 3 rows 

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  this select returns correct resultset   
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = '00' AND
	T2.KEY2 = '01' AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

#  this select returns correct resultset with 3 rows
SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	# remove this line  T1.KEY1 = '00' AND
	# remove this line  T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	T2.KEY1 = T1.KEY1 AND
	T2.KEY2 = T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

# if use binary flag again it returns correct resultset with 3 rows

SELECT  
	T1.KEY1, 
	T1.KEY2, 
	T1.KEY3, 
	T1.A, 
	T2.KEY4,
	T2.B 
FROM
	T1,T2
WHERE 
	T1.KEY1 = '00' AND
	T1.KEY2 = '01' AND
	T1.KEY3 = 2 AND
	binary T2.KEY1 = binary T1.KEY1 AND
	binary T2.KEY2 = binary T1.KEY2 AND
	T2.KEY3 = T1.KEY3	
ORDER BY T2.KEY4;

###################### CONCLUSION ############################
#                                                            #
# Following select query returns 0 rows although it should   #
# return 3 rows if :                                         #
#  - Database character Set is utf8 OR                       #
#  - Server Character Set is set to utf8 OR                  #
#  - default-character-set is set to utf8 in my.ini.         #
#                                                            #
#    SELECT                                                  #
#	T1.KEY1,                                             #
#	T1.KEY2,                                             #
#	T1.KEY3,                                             #
#	T1.A,                                                #
#	T2.KEY4,                                             #
#	T2.B                                                 #
#    FROM                                                    #
#	T1,T2                                                #
#    WHERE                                                   #
#	T1.KEY1 = '00' AND                                   #
#	T1.KEY2 = '01' AND                                   #
#	T1.KEY3 = 2 AND                                      #
#	T2.KEY1 = T1.KEY1 AND                                #
#	T2.KEY2 = T1.KEY2 AND                                #
#	T2.KEY3 = T1.KEY3	                             #
#    ORDER BY T2.KEY4;                                       #
#                                                            #
# Please refer to the table below:                           #
#                                                            #
# TryNo  Server CharSet  DB CharSet  Query Successful        #
# -----  --------------  ----------  ----------------        #
#     1  LATIN1          UTF8        NO                      #
#     2  LATIN1          LATIN1      YES                     #
#     3  UTF8            UTF8        NO                      #
#     4  UTF8            LATIN1      YES                     #
#                                                            #
##############################################################

# set character_set_server back to the default installation value
SET character_set_server=latin1;
[1 Apr 2004 6:50] MySQL Verification Team
Testing your queries against a server built from source BK tree (5 days older)
it returns the expected results. Please wait for the 4.1.2 release.

mysql> # ***************** PROBLEM  IS HERE **********************
mysql> # this select returns 0 rows although there should be 3 rows
mysql>
mysql> SELECT
    ->  T1.KEY1,
    ->  T1.KEY2,
    ->  T1.KEY3,
    ->  T1.A,
    ->  T2.KEY4,
    ->  T2.B
    -> FROM
    ->  T1,T2
    -> WHERE
    ->  T1.KEY1 = '00' AND
    ->  T1.KEY2 = '01' AND
    ->  T1.KEY3 = 2 AND
    ->  T2.KEY1 = T1.KEY1 AND
    ->  T2.KEY2 = T1.KEY2 AND
    ->  T2.KEY3 = T1.KEY3
    -> ORDER BY T2.KEY4;
+------+------+------+----------+------+----------+
| KEY1 | KEY2 | KEY3 | A        | KEY4 | B        |
+------+------+------+----------+------+----------+
| 00   | 01   |    2 | header 2 |    1 | item 2-1 |
| 00   | 01   |    2 | header 2 |    2 | item 2-2 |
| 00   | 01   |    2 | header 2 |    3 | item 2-3 |
+------+------+------+----------+------+----------+
3 rows in set (0.00 sec)

mysql> select version();
+----------------+
| version()      |
+----------------+
| 4.1.2-alpha-nt |
+----------------+
1 row in set (0.00 sec)