Description:
(gdb) bt
#0 0x00003fff7d301874 in pthread_kill () from /opt/at9.0-3-beta1/lib64/power8/libpthread.so.0
#1 0x0000000010b85680 in my_write_core (sig=<optimised out>) at /home/danielgb/mysql-5.7/mysys/stacktrace.c:247
#2 0x0000000010368984 in handle_fatal_signal (sig=<optimised out>) at /home/danielgb/mysql-5.7/sql/signal_handler.cc:220
#3 <signal handler called>
#4 row_purge_step (thr=0x10002943b78) at /home/danielgb/mysql-5.7/storage/innobase/row/row0purge.cc:1081
#5 0x0000000010c8fdb4 in que_thr_step (thr=0x10002943b78) at /home/danielgb/mysql-5.7/storage/innobase/que/que0que.cc:1051
#6 que_run_threads_low (thr=<optimised out>) at /home/danielgb/mysql-5.7/storage/innobase/que/que0que.cc:1113
#7 que_run_threads (thr=0x10002943b78) at /home/danielgb/mysql-5.7/storage/innobase/que/que0que.cc:1153
#8 0x0000000010d1d13c in srv_task_execute () at /home/danielgb/mysql-5.7/storage/innobase/srv/srv0srv.cc:2377
#9 srv_worker_thread (arg=<optimised out>) at /home/danielgb/mysql-5.7/storage/innobase/srv/srv0srv.cc:2427
#10 0x00003fff7d2f7efc in ?? () from /opt/at9.0-3-beta1/lib64/power8/libpthread.so.0
#11 0x00003fff7cb1f9a0 in clone () from /opt/at9.0-3-beta1/lib64/power8/libc.so.6
(gdb) up
#1 0x0000000010b85680 in my_write_core (sig=<optimised out>) at /home/danielgb/mysql-5.7/mysys/stacktrace.c:247
247 pthread_kill(my_thread_self(), sig);
(gdb) p sig
$1 = <optimised out>
(gdb) up
#2 0x0000000010368984 in handle_fatal_signal (sig=<optimised out>) at /home/danielgb/mysql-5.7/sql/signal_handler.cc:220
220 my_write_core(sig);
(gdb) up
#3 <signal handler called>
(gdb) up
#4 row_purge_step (thr=0x10002943b78) at /home/danielgb/mysql-5.7/storage/innobase/row/row0purge.cc:1081
1081 if (ib_vector_is_empty(node->undo_recs)) {
(gdb) p * node
$2 = {common = {type = 13, parent = 0x10002943b78, brother = 0x0, val = {data = 0x0, ext = 0, len = 0, type = {prtype = 0, mtype = 0, len = 0, mbminmaxlen = 0}},
val_buf_size = 0}, roll_ptr = 14918173787299343, undo_recs = 0x0, undo_no = 0, rec_type = 14, table = 0x0, cmpl_info = 0, update = 0x3fff34009f20, ref = 0x3fff34009ec8,
row = 0x3fff34009fb8, index = 0x0, heap = 0x10002944938, found_clust = 0, pcur = {btr_cur = {index = 0x10002bc15b8, page_cur = {index = 0x0, rec = 0x0, offsets = 0x0,
block = 0x0}, purge_node = 0x0, left_block = 0x0, thr = 0x0, flag = BTR_CUR_HASH_FAIL, tree_height = 1, up_match = 0, up_bytes = 0, low_match = 1, low_bytes = 0,
n_fields = 1, n_bytes = 0, fold = 13584942333123210352, path_arr = 0x0, rtr_info = 0x0}, latch_mode = 8, old_stored = false, old_rec = 0x0, old_n_fields = 1,
rel_pos = BTR_PCUR_ON, block_when_stored = 0x3fff625667d8, modify_clock = 785, withdraw_clock = 0, pos_state = BTR_PCUR_NOT_POSITIONED, search_mode = PAGE_CUR_LE,
trx_if_known = 0x0, old_rec_buf = 0x0, buf_size = 70}, done = 1, trx_id = 47794}
(gdb) p purge_rec
$5 = <optimised out>
(gdb) p *thr
$6 = {common = {type = 9, parent = 0x100029434e8, brother = 0x0, val = {data = 0x0, ext = 0, len = 0, type = {prtype = 0, mtype = 0, len = 0, mbminmaxlen = 0}}, val_buf_size = 0},
magic_n = 8476583, child = 0x10002943e78, graph = 0x100029434e8, state = QUE_THR_COMPLETED, is_active = 0, run_node = 0x10002943b78, prev_node = 0x10002943b78, resource = 35047,
lock_state = 0, slot = 0x0, thrs = {prev = 0x10002943918, next = 0x10002944018}, trx_thrs = {prev = 0x0, next = 0x0}, queue = {prev = 0x0, next = 0x0}, fk_cascade_depth = 0}
(gdb) p node->undo_recs
$3 = (ib_vector_t *) 0x0
The expansion of ib_vector_is_empty accesses a member of the undo_recs causing the SEGV
undo_recs=NULL is set in row_purge_end which is only called from row_purge_step after the SEGV happened here. row_purge_step is only called from que_thr_step
The que_thr_step has a statement old_thr->prev_node = node; row_purge_end seems to always return the same thr it was passed.
(gdb) up
#5 0x0000000010c8fdb4 in que_thr_step (thr=0x10002943b78) at /home/danielgb/mysql-5.7/storage/innobase/que/que0que.cc:1051
1051 thr = row_purge_step(thr);
(gdb) p *old_thr
$7 = {common = {type = 9, parent = 0x100029434e8, brother = 0x0, val = {data = 0x0, ext = 0, len = 0, type = {prtype = 0, mtype = 0, len = 0, mbminmaxlen = 0}}, val_buf_size = 0},
magic_n = 8476583, child = 0x10002943e78, graph = 0x100029434e8, state = QUE_THR_COMPLETED, is_active = 0, run_node = 0x10002943b78, prev_node = 0x10002943b78, resource = 35047,
lock_state = 0, slot = 0x0, thrs = {prev = 0x10002943918, next = 0x10002944018}, trx_thrs = {prev = 0x0, next = 0x0}, queue = {prev = 0x0, next = 0x0}, fk_cascade_depth = 0}
(gdb) p old_thr
$8 = (que_thr_t *) 0x10002943b78
(gdb) p thr
$9 = (que_thr_t *) 0x10002943b78
notice for thr run_node == prev_node
(gdb) up
#6 que_run_threads_low (thr=<optimised out>) at /home/danielgb/mysql-5.7/storage/innobase/que/que0que.cc:1113
1113 next_thr = que_thr_step(thr);
(gdb) p next_thr
$10 = <optimised out>
(gdb) p trx
$11 = <optimised out>
(gdb) up
#7 que_run_threads (thr=0x10002943b78) at /home/danielgb/mysql-5.7/storage/innobase/que/que0que.cc:1153
1153 que_run_threads_low(thr);
(gdb) p thr
$12 = (que_thr_t *) 0x10002943b78
(gdb) p *thr
$13 = {common = {type = 9, parent = 0x100029434e8, brother = 0x0, val = {data = 0x0, ext = 0, len = 0, type = {prtype = 0, mtype = 0, len = 0, mbminmaxlen = 0}},
val_buf_size = 0}, magic_n = 8476583, child = 0x10002943e78, graph = 0x100029434e8, state = QUE_THR_COMPLETED, is_active = 0, run_node = 0x10002943b78,
prev_node = 0x10002943b78, resource = 35047, lock_state = 0, slot = 0x0, thrs = {prev = 0x10002943918, next = 0x10002944018}, trx_thrs = {prev = 0x0, next = 0x0}, queue = {
prev = 0x0, next = 0x0}, fk_cascade_depth = 0}
Given above que_thr_step is a loop the following patch was attempted:
diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc
index 2f7e78e..280d271 100644
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@@ -1070,6 +1070,7 @@ que_thr_step(
} else {
old_thr->prev_node = node;
}
+ __asm__ volatile ("lwsync");
if (thr) {
ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
It appears other asserts are occuring in the que0que so this isn't the right fix or not the only fix.
How to repeat:
./mtr
federated.federated_server w40 [ fail ]
Test ended at 2016-03-09 14:45:06
CURRENT_TEST: federated.federated_server
mysqltest: At line 334: query 'call p1()' failed: 2013: Lost connection to MySQL server during query