Bug #69328 Encoding problem : 1 database utf8 with 1 table utf8 and 1 table latin1
Submitted: 27 May 2013 16:01 Modified: 30 May 2013 13:48
Reporter: Lucas Pouzac Email Updates:
Status: Verified Impact on me:
None 
Category:Connector / J Severity:S2 (Serious)
Version:5.1.25 OS:Any
Assigned to: Alexander Soklakov CPU Architecture:Any

[27 May 2013 16:01] Lucas Pouzac
Description:
I try to get the same character ('é') from a table in latin1 and utf8 via the driver, but can not get there.

ResultSet.getBytes(...) is correct but no ResultSet.getString(...)

Below is my test case.

How to repeat:
###############################################
# CREATING DATABASE
###############################################
DROP DATABASE encoding;
CREATE DATABASE encoding CHARACTER SET utf8 COLLATE utf8_bin;

use encoding

CREATE TABLE utf8_table (
  id varchar(50) NOT NULL,
  data longtext NOT NULL)
  CHARACTER SET utf8 COLLATE utf8_bin;

SET NAMES utf8;

INSERT INTO utf8_table VALUES ("é", "é");

CREATE TABLE latin1_table (
  id varchar(50) NOT NULL,
  data longtext NOT NULL)
  CHARACTER SET latin1 COLLATE latin1_bin;

SET NAMES latin1;

INSERT INTO latin1_table VALUES ("é", "é");

GRANT ALL PRIVILEGES ON encoding.* TO 'encoding'@'localhost' IDENTIFIED BY 'encoding' WITH GRANT OPTION;
GRANT ALL PRIVILEGES ON encoding.* TO 'encoding'@'%' IDENTIFIED BY 'encoding' WITH GRANT OPTION;

###############################################
# CREATING DATABASE - RESULTS OK
###############################################

SET NAMES utf8;
SELECT * FROM utf8_table;
+----+------+
| id | data |
+----+------+
| é  | é    |
+----+------+

SELECT * FROM latin1_table;
+------+------+
| id   | data |
+------+------+
| é   | é   |
+------+------+

SET NAMES latin1;

SELECT * FROM latin1_table;
+----+------+
| id | data |
+----+------+
| é  | é    |
+----+------+

SELECT * FROM utf8_table;
+----+------+
| id | data |
+----+------+
| �   | �     |
+----+------+

###############################################
# JAVA USE CASE (with mysql-connector-java-5.1.25
###############################################

import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;

public class TestMysqlEncoding {

    private String dbURL = "";

    private String user = "";

    private String password = "";

    private java.sql.Connection dbConnect = null;

    private java.sql.Statement dbStatement = null;

    public TestMysqlEncoding(String url, String user, String password) {
        this.dbURL = url;
        this.user = user;
        this.password = password;
    }

    public Boolean connect() {
        try {
            Class.forName("com.mysql.jdbc.Driver").newInstance();
            System.out.println(dbURL);
            this.dbConnect = DriverManager.getConnection(this.dbURL, this.user, this.password);
            this.dbStatement = this.dbConnect.createStatement();
            return true;
        } catch (Exception ex) {
            System.err.println(ex);
        }
        return false;
    }

    public ResultSet exec(String sql) {
        try {
            return this.dbStatement.executeQuery(sql);
        } catch (Exception ex) {
            System.err.println(ex);
        }
        return null;
    }

    public int execUpdate(String sql) {
        try {
            return this.dbStatement.executeUpdate(sql);
        } catch (Exception ex) {
            System.err.println(ex);
        }
        return -1;
    }

    public void close() {
        try {
            this.dbStatement.close();
            this.dbConnect.close();
            this.dbConnect.close();
        } catch (SQLException ex) {
            System.err.println(ex);
        }
    }

    public static void main(String[] args) {
        String[] tablesName = { "utf8_table", "latin1_table" };

        for (String tableName : tablesName) {
            System.out.println("***************************");
            System.out.println("** " + tableName);
            System.out.println("***************************");

            String[] paramsMysql = {
                    "useUnicode=true&characterEncoding=utf8",
                    "useUnicode=true&characterEncoding=latin1" };

            for (String paramMysql : paramsMysql) {
                TestMysqlEncoding mysqlCli = new TestMysqlEncoding("jdbc:mysql://localhost:3306/encoding?" + paramMysql, "encoding", "encoding");
                try {
                    if (mysqlCli.connect()) {
                        try {
                            ResultSet rs = mysqlCli.exec("SELECT * FROM " + tableName);
                            if (rs != null) {
                                while (rs.next()) {
                                    System.out.println("column1 value : " + rs.getString(1)
                                            + " (bytes : [" + bytesToString(rs.getBytes(1)) + "])"
                                            + ", column2 value " + rs.getString(2)
                                            + " (bytes : [" + bytesToString(rs.getBytes(2)) + "])");
                                }
                            }
                        } catch (Exception ex) {
                            System.err.println(ex);
                        }
                    } else {
                        System.err.println("Mysql connection failed !!!");
                    }
                } finally {
                    mysqlCli.close();
                }
            }
        }
    }

    private static String bytesToString(byte[] bytes) {
        String result = "";
        boolean first = true;
        for (byte b : bytes) {
            if (!first) {
                result += ",";
            }
            result += b;
            first = false;
        }
        return result;
    }
}

###############################################
# JAVA USE CASE - RESULTS KO
###############################################

***************************
** utf8_table
***************************
jdbc:mysql://localhost:3306/encoding?useUnicode=true&characterEncoding=utf8
column1 value : é (bytes : [-61,-87]), column2 value é (bytes : [-61,-87])
jdbc:mysql://localhost:3306/encoding?useUnicode=true&characterEncoding=latin1
column1 value : é (bytes : [-61,-87]), column2 value é (bytes : [-61,-87])
***************************
** latin1_table
***************************
jdbc:mysql://localhost:3306/encoding?useUnicode=true&characterEncoding=utf8
column1 value : é (bytes : [-61,-87]), column2 value é (bytes : [-61,-87])
jdbc:mysql://localhost:3306/encoding?useUnicode=true&characterEncoding=latin1
column1 value : é (bytes : [-61,-87]), column2 value é (bytes : [-61,-87])

Suggested fix:
It's just an idea, maybe it's not possible in all case.

Encoding set to thisRow.getString(internalColumnIndex, encoding, this.connection) it's not metadata.getCharacterSet() but characterSetResult between client and server.
[27 May 2013 21:50] Mark Matthews
> Suggested fix:
> It's just an idea, maybe it's not possible in all case.
> 
> Encoding set to thisRow.getString(internalColumnIndex, encoding, this.connection) it's not > metadata.getCharacterSet() but characterSetResult between client and server.

No, it's metadata.getCharacterSet(), which *is* characterSetResult, which by default is null with the JDBC driver, which means the server returns whatever character set it thinks is correct, and the driver does the decoding.

I'm guessing in your case, MySQL is lying about what character encoding your results are actually encoded in, or as has happened in the past, encodes them one way, but reports them differently.
[30 May 2013 13:48] Alexander Soklakov
Hi Lucas,

Thanks for your report. Verified as described.