Bug #67225 binary data truncated when using UNION in prepared statement
Submitted: 14 Oct 2012 15:02 Modified: 29 Apr 2014 8:39
Reporter: Olivier Lefevre Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: Prepared statements Severity:S2 (Serious)
Version:5.6.10 OS:Microsoft Windows (XP SP3 32bit)
Assigned to: CPU Architecture:Any
Tags: BINARY, jdbc, PreparedStatement

[14 Oct 2012 15:02] Olivier Lefevre
Description:
Insert statements involving (I think; hard to say for sure since EXPLAIN does not work with INSERT) temporary tables cause data corruption with binary types and server-side prepared statements. 

In the test program below the binary id inserted is 0542390000000000000000000000000000 and not 054239BD372600B3A4E2119A15FC4B05C5 as it should be whenever argv[4] is true and argv[5] >= 2.

The MySQL Server version is 5.5.28. Java is Oracle 1.7.0_07-b10.

How to repeat:
import java.sql.*;

import javax.sql.DataSource;
import javax.xml.bind.DatatypeConverter;

import com.mysql.jdbc.jdbc2.optional.MysqlDataSource;

public class Bug1 {

    public static final void main(String[] argv) throws SQLException {
        DataSource ds = createDataSource(argv[0], Integer.parseInt(argv[1]), argv[2], argv[3], Boolean.parseBoolean(argv[4]));
        Connection conn = ds.getConnection();
        int ns = Integer.parseInt(argv[5]);
        try { test(conn, ns); }
        finally { if (conn != null) conn.close(); }
    }

    private static void test(Connection conn, int ns) throws SQLException {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE DATABASE IF NOT EXISTS test");
        stmt.execute("DROP TABLE IF EXISTS test.t1, test.t2");
        stmt.execute("CREATE TABLE test.t1 (id BINARY(17) NOT NULL)");
        stmt.execute("CREATE TABLE test.t2 (id BINARY(17) NOT NULL)");
        stmt.execute("DROP PROCEDURE IF EXISTS test.p1");
        stmt.execute("CREATE PROCEDURE test.p1(OUT out_id BINARY(17)) BEGIN SET @id = UNHEX('054239BD372600B3A4E2119A15FC4B05C5'); INSERT INTO test.t1 (id) VALUES (@id); SELECT @id INTO out_id; END");
        String sql = "{CALL test.p1(?)}";
        System.out.println(sql);
        CallableStatement cs = conn.prepareCall(sql);
        cs.registerOutParameter(1, Types.BINARY);
        cs.executeUpdate();
        byte[] id = cs.getBytes(1);
        cs.close();
        System.out.println("=> " + bin2hex(id));
        PreparedStatement ps = conn.prepareStatement(createSelect(ns));
        for (int i = 1; i <= Math.max(1, ns); i++)
            ps.setBytes(i, id);
        ps.executeUpdate();
        ps.close();
        ResultSet rs = stmt.executeQuery("SELECT id FROM test.t2");
        rs.first();
        System.out.println("=> " + bin2hex(rs.getBytes(1)));
        rs.close();
        stmt.close();
    }

    private static DataSource createDataSource(String server, int port, String user, String pwd, boolean ss) throws SQLException {
        MysqlDataSource mds = new MysqlDataSource();
        mds.setServerName(server);
        mds.setPortNumber(port);
        mds.setUser(user);
        mds.setPassword(pwd);
        mds.setUseServerPrepStmts(ss);
        return mds;
    }

    private static String createSelect(int ns) {
        String sql = "INSERT INTO test.t2 (id)";
        if (ns == 0)
            sql += " VALUES (?)";
        else {
            for (int i = 0; i < ns; i++) {
                if (i > 0) sql += " UNION";
                sql += " SELECT ? AS id"; } }
        System.out.println(sql);
        return sql;
    }

    private static String bin2hex(byte[] bin) {
        return DatatypeConverter.printHexBinary(bin);
    }
}

Suggested fix:
Do not use server-side prepared statements.
[14 Nov 2012 0:33] Todd Farmer
I'm unable to reproduce the reported results using MySQL 5.5.13 and Connector/J 5.1.22.  I've modified the test program only slightly (to avoid using javax.xml package and to change from main() method call):

   public static final void test67225(String[] argv) throws SQLException {
        DataSource ds = createDataSource(argv[0], Integer.parseInt(argv[1]), argv[2], argv[3], Boolean.parseBoolean(argv[4]));
        Connection conn = ds.getConnection();
        int ns = Integer.parseInt(argv[5]);
        try { test(conn, ns); }
        finally { if (conn != null) conn.close(); }
    }

    private static void test(Connection conn, int ns) throws SQLException {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE DATABASE IF NOT EXISTS test");
        stmt.execute("DROP TABLE IF EXISTS test.t1, test.t2");
        stmt.execute("CREATE TABLE test.t1 (id BINARY(17) NOT NULL)");
        stmt.execute("CREATE TABLE test.t2 (id BINARY(17) NOT NULL)");
        stmt.execute("DROP PROCEDURE IF EXISTS test.p1");
        stmt.execute("CREATE PROCEDURE test.p1(OUT out_id BINARY(17)) BEGIN SET @id = UNHEX('054239BD372600B3A4E2119A15FC4B05C5'); INSERT INTO test.t1 (id) VALUES (@id); SELECT @id INTO out_id; END");
        String sql = "{CALL test.p1(?)}";
        System.out.println(sql);
        CallableStatement cs = conn.prepareCall(sql);
        cs.registerOutParameter(1, Types.BINARY);
        cs.executeUpdate();
        byte[] id = cs.getBytes(1);
        cs.close();
        System.out.println("=> " + bytesToHexString(id));
        PreparedStatement ps = conn.prepareStatement(createSelect(ns));
        for (int i = 1; i <= Math.max(1, ns); i++)
            ps.setBytes(i, id);
        ps.executeUpdate();
        ps.close();
        ResultSet rs = stmt.executeQuery("SELECT id FROM test.t2");
        rs.first();
        System.out.println("=> " + bytesToHexString(rs.getBytes(1)));
        rs.close();
        stmt.close();
    }

    private static DataSource createDataSource(String server, int port, String user, String pwd, boolean ss) throws SQLException {
        MysqlDataSource mds = new MysqlDataSource();
        mds.setServerName(server);
        mds.setPortNumber(port);
        mds.setUser(user);
        mds.setPassword(pwd);
        mds.setUseServerPrepStmts(ss);
        return mds;
    }

    private static String createSelect(int ns) {
        String sql = "INSERT INTO test.t2 (id)";
        if (ns == 0)
            sql += " VALUES (?)";
        else {
            for (int i = 0; i < ns; i++) {
                if (i > 0) sql += " UNION";
                sql += " SELECT ? AS id"; } }
        System.out.println(sql);
        return sql;
    }

    public static String bytesToHexString(byte[] bytes) {  
        StringBuilder sb = new StringBuilder(bytes.length * 2);  
      
        java.util.Formatter formatter = new java.util.Formatter(sb);  
        for (byte b : bytes) {  
            formatter.format("%02x", b);  
        }  
      
        return sb.toString().toUpperCase();  
    } 

This is invoked using:

test67225(new String[] {"localhost", "3306", "root", "", "true", "2"});

The results are:

{CALL test.p1(?)}
=> 054239BD372600B3A4E2119A15FC4B05C5
INSERT INTO test.t2 (id) SELECT ? AS id UNION SELECT ? AS id
=> 054239BD372600B3A4E2119A15FC4B05C5

Please confirm whether the modified test case continues to exhibit the data truncation reported.
[14 Nov 2012 0:39] Todd Farmer
I will note the following:

1.  The data is reported to be truncated starting with the first alpha character.  This is indicative of potential MySQL silent type conversion data truncation.

2.  The test case leverages untyped data being returned from a SELECT statement.  Unlike data being returned from a table, this statement forces MySQL to "guess" at what data type is being represented, and it's entirely possible that it guesses incorrectly.  The proper solution for this would be to explicitly cast the result from the SELECT statement, like so:

SELECT CAST(? AS BINARY(17)) AS id UNION SELECT CAST(? AS BINARY(17)) AS id ...
[15 Nov 2012 0:27] Olivier Lefevre
Yes, the modified version still exhibits this behaviour.

I don't understand your comment that "The test case leverages untyped data being returned from a SELECT statement." On the way in, the binary data is inserted (the UNION statement in the test is an INSERT, not a SELECT) by means of a prepared statement and setBytes does set the type to BINARY; on the way out it is read from a table. Thus I don't see where untyped data come into the picture.
[21 Nov 2012 3:02] Olivier Lefevre
Any news?
[22 Nov 2012 21:22] Todd Farmer
The untyped data comes from the INSERT INTO test.t2 (id) SELECT ... statement.  The data returned from the SELECT portion of this is not explicitly typed.

As noted, execution of the modified test case produces the following for me:

{CALL test.p1(?)}
=> 054239BD372600B3A4E2119A15FC4B05C5
INSERT INTO test.t2 (id) SELECT ? AS id UNION SELECT ? AS id
=> 054239BD372600B3A4E2119A15FC4B05C5

You indicate you get something different.  Please provide the results of execution of the modified test case, along with output of SHOW GLOBAL VARIABLES executed against the same server being used.
[23 Nov 2012 3:40] Todd Farmer
Two more pieces of information to collect:

1.  Please provide the output for SHOW CREATE TABLE test.t2\G  (specifically, I'm looking to see whether InnoDB or MyISAM is being used).

2.  Try replacing the getSelect() method with the following:

    private static String createSelect(int ns) {
        String sql = "INSERT INTO test.t2 (id) VALUES";
        if (ns == 0)
            sql += " (?)";
        else {
            for (int i = 0; i < ns; i++) {
                if (i > 0) sql += ",";
                sql += " (?)"; } }
        System.out.println(sql);
        return sql;
    }

This replaces the INSERT ... SELECT syntax (the *only* condition you report triggering the behavior) with multi-value INSERT.  Rewriting the statement to do a direct INSERT instead of INSERT ... SELECT will eliminate the possibility of multiple lossy type conversions.
[23 Nov 2012 7:10] Olivier Lefevre
1. I see now where you claim the untyped variable comes from. Replacing "?" with "CAST(? AS BINARY(17))" in the INSERT statement does eliminate the problem. I am still surprised by this, though, because of the prepared statement: shouldn't the call to setBytes effectively set the type of the variables? As noted in my first post, it does when using JDBC-generated and not server-side prepared statements; why the difference? Furthermore the correct type can also be inferred from the destination, i.e., from the type of the test.id column; why isn't it? In summary perhaps MySQL does have an untyped variable issue here but there doesn't seem to be any reason why it should.

2. This is a highly artificial example obtained from distillation of the real program, in which it isn't really possible to rewrite this as a multi-valued INSERT because the SELECT parts are complicated statements with WHERE clauses etc.

3. SHOW CREATE TABLE test.t2\G returns
Create Table: CREATE TABLE `t2` (
  `id` binary(17) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8
[26 Nov 2012 18:41] Todd Farmer
The difference between server-side prepared statement execution and client-side emulated prepared statement execution can be observed by turning on the server general query log.  Here's the result with useServerPrepStmts enabled:

		    2 Prepare	INSERT INTO test.t2 (id) SELECT ? AS id UNION SELECT ? AS id
		    2 Execute	INSERT INTO test.t2 (id) SELECT 'B9½7&\0³¤âšüKÅ' AS id UNION SELECT 'B9½7&\0³¤âšüKÅ' AS id

Here it is disabled:

		    3 Query	INSERT INTO test.t2 (id) SELECT _binary'B9½7&\0³¤âšüKÅ' AS id UNION SELECT _binary'B9½7&\0³¤âšüKÅ' AS id

When client-side emulated prepared statements are used, the SQL sent includes the _binary flag to indicate binary data.  That's not present in server-side prepared statements.  It's worth noting that server-side prepared statements are disabled by default because of problems and limitations such as these.  For almost all use cases, the MySQL server implementation of prepared statements adds no value over the client-side emulated prepared statements in Connector/J.  I would encourage affected users to consider whether server-side prepared statements are truly required.

Your point about the server knowing the data type of the parameter passed to the SELECT statement is valid.  It would make a potentially useful (server-side) feature request to ensure that the data type returned by SELECT statements against a literal be typed according to the parameter passed as an argument. 

I would like to be able to reproduce the behavior you describe, but so far, I've been unable to do so.  I have no explanation for the difference in behavior you observe with the same code.
[30 Nov 2012 12:29] Olivier Lefevre
Thanks a lot for the detailed diagnostic. I also do not know why you can't reproduce the behaviour: I think I nailed down the exact version of all the parts involved in my original posting. 

I turned on general logging and while I see the same output as you in the server-side case, in the client-side case I see:

3 Query	INSERT INTO test.t2 (id) SELECT x'054239BD372600B3A4E2119A15FC4B05C5' AS id UNION SELECT x'054239BD372600B3A4E2119A15FC4B05C5' AS id

This makes sense (this is how I log statements with binary variables in my own log file) but it's definitely not what you reported.
[30 Nov 2012 16:43] Todd Farmer
Hello Olivier,

The difference noted in how the client-side emulated prepared statements are expressed is triggered by character-set related conditions.  Here's the relevant code from PreparedStatement.java:

					if (this.connection.isNoBackslashEscapesSet()
							|| (this.connection.getUseUnicode() 
									&& connectionEncoding != null
									&& CharsetMapping.isMultibyteCharset(connectionEncoding)
									&& !this.connection.parserKnowsUnicode())) {
						hexEscape = true;
					}
...
				if (hexEscape) {
					packet.writeStringNoNull("x");
				} else if (this.connection.getIO().versionMeetsMinimum(4, 1, 0)) {
					packet.writeStringNoNull("_binary");
				}

If you can provide the server configuration file used by MySQL, I will test with that to see if server-side character set defaults may affect server-side prepared statement execution in a way that allows for data truncation as you have described.

Thanks,

Todd
[11 Dec 2012 21:45] Olivier Lefevre
Sorry for the delay. I guess it is the character-related server directives you are interested in. They are:

[mysql]
default-character-set=utf8
[mysqld]
character-set-server=utf8
!includedir "c:\\Programme\\MySQL\\MySQL Server 5.5\\conf"

And in the conf director there is this charset.cnf file:

[client]
default-character-set = utf8
[mysql]
default-character-set = utf8
[mysqld]
init_connect='SET collation_connection = utf8_general_ci; SET NAMES utf8;'
character-set-server=utf8
collation-server=utf8_general_ci
character-set-database=utf8
collation-database=utf8_general_ci
skip-character-set-client-handshake

Lastly the section of context.xml file in which the connection pool is configured has this entry:

connectionProperties="characterEncoding=utf8;[...other properties...]"
[15 May 2013 12:14] Jess Balint
test case fails with truncated data when server character_set_server set to UTF8. verified against C/J HEAD and MySQL 5.6.10
[15 May 2013 12:15] Jess Balint
test case for Bug#67225

Attachment: Bug67225.java (text/x-java), 1.15 KiB.

[15 May 2013 18:33] Jess Balint
C test case for Bug#67225

Attachment: bug67225.c (text/x-csrc), 1.59 KiB.

[15 May 2013 18:35] Jess Balint
Reassigned as server bug, verified against 5.6.10 with attached C test case. Query plan shows temp tables are used for UNION processing.
[15 May 2013 21:39] Jess Balint
The expected outcome of the C test case is to result in hex(x) from the inserted row showing 0x"5080". The truncated result is 0x"5000".