Description:
An update statement which would violate a unique key can crash full cluster (if query received from statement binlog [eg from standard mysql master]) and can crash a single node if query ran directly on cluster.
See How to repeat for full process.
...
DBTUX 040035 040037 050031
DBTUP 070106
DBTUX 040063 050036
DBTUP 031154 030398 030555 030558
--------------- Signal ----------------
r.bn: 247 "DBLQH", r.proc: 2, r.sigId: 936524 gsn: 316 "LQHKEYREQ" prio: 1
s.bn: 245 "DBTC", s.proc: 2, s.sigId: 936521 length: 19 trace: 1 #sec: 0 fragInf: 0
ClientPtr = H'00020818 hashValue = H'186c72d0 tcBlockRef = H'00f50002
transId1 = H'00000010 transId2 = H'00500400 savePointId = H'00000000
Op: 1 Lock: 0 Flags: ScanTakeOver CommitAckMarker NoDisk ScanInfo/noFiredTriggers: H'1
AttrLen: 5 (5 in this) KeyLen: 1 TableId: 9 SchemaVer: 1
FragId: 0 ReplicaNo: 0 LastReplica: 1 NextNodeId: 3
ApiRef: H'80050004 ApiOpRef: H'00000014
KeyInfo: H'00000003
AttrInfo: H'00030005 H'30303004 H'00000038 H'fff20004 H'00000001
Either way, both crash in the same location in the trace in ./storage/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp ;
547 void
548 Dbtup::fireImmediateTriggers(KeyReqStruct *req_struct,
549 DLList<TupTriggerData>& triggerList,
550 Operationrec* const regOperPtr)
551 {
552 TriggerPtr trigPtr;
553 triggerList.first(trigPtr);
554 while (trigPtr.i != RNIL) {
555 jam();
556 if (trigPtr.p->monitorAllAttributes ||
557 trigPtr.p->attributeMask.overlaps(req_struct->changeMask)) {
558 jam();
559 executeTrigger(req_struct,
560 trigPtr.p,
561 regOperPtr);
562 }//if
563 triggerList.next(trigPtr);
564 }//while
565 }//Dbtup::fireImmediateTriggers()
How to repeat:
This does not crash when disk data is not used. Received this output;
# mysql -S /tmp/master.sock -u root db1 -e "update t1 set c3 = '0008';";
ERROR 1062 (23000) at line 1: Duplicate entry '1-0008' for key 'c1c3'
# mysql -S /tmp/slave.sock -u root db1 -e "update t1 set c3 = '0008';";
ERROR 1062 (23000) at line 1: Duplicate entry '' for key '*UNKNOWN*'
Also does not crash via replication when row based repl used. However still crashed one node when UPDATE run directly.
Full repeat steps;
mysqlbasedir=/usr/local/mysql5122/
mysqlbindir=${mysqlbasedir}/bin/
masterdatadir=/usr/local/mysql5122/data/
slavedatadir=/usr/local/mysqlcluster/mysqldata1/
ndbd1_dir=/usr/local/mysqlcluster/ndbd1/
ndbd2_dir=/usr/local/mysqlcluster/ndbd2/
ndb_mgm_config=/usr/local/mysqlcluster/config51_2dn_normal.ini
ndb_mgm -e "shutdown";
rm -rf ${ndbd1_dir}/
mkdir ${ndbd1_dir}/
rm -rf ${ndbd2_dir}/
mkdir ${ndbd2_dir}/
mysqladmin -S /tmp/master.sock -u root shutdown
mysqladmin -S /tmp/slave.sock -u root shutdown
sleep 1;
cd ${mysqlbasedir}
bin/mysqld_safe --no-defaults --user=mysql \
--server-id=1 --log-bin=master-bin --log-bin-index=master-index --binlog-format=statement \
--socket=/tmp/master.sock &
sleep 4;
# drop any existing db, and create it again
mysql -S /tmp/master.sock -u root -e "set sql_log_bin=0; drop database if exists db1; create database db1;";
# create myisam table for purpose of replicating it only.
mysql -S /tmp/master.sock -u root db1 -e "set sql_log_bin=0; CREATE TABLE t1 (id int(10) unsigned NOT NULL AUTO_INCREMENT,c1 int(10) unsigned NOT NULL,c2 varchar(45) DEFAULT NULL,c3 varchar(32) NOT NULL,PRIMARY KEY (id),UNIQUE KEY c1c3 (c1,c3)) engine=myisam;";
mysql -S /tmp/master.sock -u root -e "GRANT REPLICATION SLAVE ON *.* TO 'slave_user'@'localhost' IDENTIFIED BY 'slave_password';";
mysql -S /tmp/master.sock -u root -e "RESET MASTER;";
# start ndb cluster, slave mysqld, and create disk data table spaces etc.
${mysqlbindir}/ndb_mgmd -f ${ndb_mgm_config}
${mysqlbindir}/ndbd --initial
${mysqlbindir}/ndbd --initial
sleep 25;
cd ${mysqlbasedir}
bin/mysqld_safe --no-defaults --user=mysql --port=4406 --socket=/tmp/slave.sock \
--datadir=${slavedatadir} --server-id=2 \
--skip-slave-start \
--ndbcluster --ndb-connectstring=localhost &
sleep 25;
mysql -S /tmp/slave.sock -u root -e "set sql_log_bin=0; drop database if exists db1; create database db1;";
mysql -S /tmp/slave.sock -u root -e "RESET SLAVE;";
mysql -S /tmp/slave.sock -u root -e "CHANGE MASTER TO MASTER_HOST='localhost', MASTER_PORT=3306, MASTER_USER='slave_user', MASTER_PASSWORD='slave_password';";
mysql -S /tmp/slave.sock -u root -e "CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=106;";
mysql -S /tmp/slave.sock -u root -e "START SLAVE;"
sleep 15;
mysql -S /tmp/slave.sock -u root db1 -e "CREATE LOGFILE GROUP lg_1 ADD UNDOFILE 'undo_1.dat' INITIAL_SIZE 16M UNDO_BUFFER_SIZE 2M ENGINE NDB;";
mysql -S /tmp/slave.sock -u root db1 -e "CREATE TABLESPACE ts_1 ADD DATAFILE 'data_1.dat' USE LOGFILE GROUP lg_1 INITIAL_SIZE 32M ENGINE NDB;";
#===
# create ndb table in tablespace.
mysql -S /tmp/slave.sock -u root db1 -e "CREATE TABLE t1 (id int(10) unsigned NOT NULL AUTO_INCREMENT,c1 int(10) unsigned NOT NULL,c2 varchar(45) DEFAULT NULL,c3 varchar(32) NOT NULL,PRIMARY KEY (id),UNIQUE KEY c1c3 (c1,c3)) TABLESPACE ts_1 STORAGE DISK ENGINE=ndbcluster;";
# insert into master
mysql -S /tmp/master.sock -u root db1 -e "insert into t1 values(NULL,1,'test1','0005');";
mysql -S /tmp/master.sock -u root db1 -e "insert into t1 values(NULL,1,'test1','0006');";
mysql -S /tmp/master.sock -u root db1 -e "insert into t1 values(NULL,1,'test1','0007');";
#mysql -S /tmp/master.sock -u root db1 -e "select * from t1;";
#mysql -S /tmp/slave.sock -u root db1 -e "select * from t1;";
#================= To crash via replication (whole cluster)
mysql -S /tmp/master.sock -u root db1 -e "update t1 set c3 = '0008';";
#(recover nodes to do the single node crash)
#${mysqlbindir}/ndbd
#${mysqlbindir}/ndbd
#================= To crash only single node (not whole cluster) run this direct on slave
mysql -S /tmp/slave.sock -u root db1 -e "update t1 set c3 = '0008';";
Suggested fix:
- Do not crash full cluster when statement base replication used.
- Do not crash node when violating unique UPDATE is run directly on API node.
ndb_error report tarball will be attached. First crash is full cluster via replication, second crash is statement run directly.