Description:
In our application, we create 1000 sessions that we keep reusing. Also, we have a daemon thread that periodically takes a write lock on a scan operation and perform some tasks accordingly.
We were using 7.5.6 which was working perfectly fine, but upon upgrading NDB to 7.5.8, we get a SIGSEGV error quite frequently, the application runs for a couple of minutes and then crash.
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x00007f5ba1bd851a, pid=11455, tid=0x00007f5bbf8b3700
#
# JRE version: Java(TM) SE Runtime Environment (8.0_151-b12) (build 1.8.0_151-b12)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.151-b12 mixed mode linux-amd64 compressed oops)
# Problematic frame:
# C [libndbclient.so.6.1.0+0x12951a] NdbOperation::insertKEYINFO_NdbRecord(char const*, unsigned int)+0xda
STACKTRACE
============================
Stack: [0x00007f5bbf7b3000,0x00007f5bbf8b4000], sp=0x00007f5bbf8b2220, free space=1020k
Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
C [libndbclient.so.6.1.0+0x12951a] NdbOperation::insertKEYINFO_NdbRecord(char const*, unsigned int)+0xda
C [libndbclient.so.6.1.0+0x129aa6] NdbOperation::buildSignalsNdbRecord(unsigned int, unsigned long long, unsigned int const*)+0x4d6
C [libndbclient.so.6.1.0+0x12cdda] NdbScanOperation::takeOverScanOpNdbRecord(NdbOperation::OperationType, NdbTransaction*, NdbRecord const*, char*, unsigned char const*, NdbOperation::OperationOptions const*, un
signed int)+0x29a
C [libndbclient.so.6.1.0+0x12cefc] NdbScanOperation::lockCurrentTuple(NdbTransaction*, NdbRecord const*, char*, unsigned char const*, NdbOperation::OperationOptions const*, unsigned int)+0x4c
C [libndbclient.so.6.1.0+0x1fb8a3] Java_com_mysql_ndbjtie_ndbapi_NdbScanOperation_lockCurrentTuple__Lcom_mysql_ndbjtie_ndbapi_NdbTransaction_2Lcom_mysql_ndbjtie_ndbapi_NdbRecordConst_2Ljava_nio_ByteBuffer_2_3BLc
om_mysql_ndbjtie_ndbapi_NdbOperation_00024OperationOptionsConst_2I+0x123
J 738 com.mysql.ndbjtie.ndbapi.NdbScanOperation.lockCurrentTuple(Lcom/mysql/ndbjtie/ndbapi/NdbTransaction;Lcom/mysql/ndbjtie/ndbapi/NdbRecordConst;Ljava/nio/ByteBuffer;[BLcom/mysql/ndbjtie/ndbapi/NdbOperation$Ope
rationOptionsConst;I)Lcom/mysql/ndbjtie/ndbapi/NdbOperationConst; (0 bytes) @ 0x00007f5ba92a7566 [0x00007f5ba92a7440+0x126]
J 1003 C2 com.mysql.clusterj.tie.NdbRecordScanResultDataImpl.next()Z (231 bytes) @ 0x00007f5ba93a7cb0 [0x00007f5ba93a78e0+0x3d0]
J 1078 C2 com.mysql.clusterj.core.query.QueryDomainTypeImpl.getResultList(Lcom/mysql/clusterj/core/spi/QueryExecutionContext;JJLcom/mysql/clusterj/Query$Ordering;[Ljava/lang/String;)Ljava/util/List; (130 bytes) @
0x00007f5ba93fd2e0 [0x00007f5ba93fcae0+0x800]
J 1085 C2 com.mysql.clusterj.core.query.QueryImpl.getResultList()Ljava/util/List; (56 bytes) @ 0x00007f5ba93d48e4 [0x00007f5ba93d4880+0x64]
j se.kth.ndb.test.SIGSEGVTest.runTest()V+128
j se.kth.ndb.test.SIGSEGVTest.start([Ljava/lang/String;)V+18
j se.kth.ndb.test.Main.main([Ljava/lang/String;)V+8
v ~StubRoutines::call_stub
V [libjvm.so+0x693e76] JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*)+0x1056
V [libjvm.so+0x6d5292] jni_invoke_static(JNIEnv_*, JavaValue*, _jobject*, JNICallType, _jmethodID*, JNI_ArgumentPusher*, Thread*)+0x362
V [libjvm.so+0x6f1afa] jni_CallStaticVoidMethod+0x17a
C [libjli.so+0x80ff] JavaMain+0x81f
C [libpthread.so.0+0x8184] start_thread+0xc4
Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)
J 738 com.mysql.ndbjtie.ndbapi.NdbScanOperation.lockCurrentTuple(Lcom/mysql/ndbjtie/ndbapi/NdbTransaction;Lcom/mysql/ndbjtie/ndbapi/NdbRecordConst;Ljava/nio/ByteBuffer;[BLcom/mysql/ndbjtie/ndbapi/NdbOperation$Ope
rationOptionsConst;I)Lcom/mysql/ndbjtie/ndbapi/NdbOperationConst; (0 bytes) @ 0x00007f5ba92a74ec [0x00007f5ba92a7440+0xac]
J 1003 C2 com.mysql.clusterj.tie.NdbRecordScanResultDataImpl.next()Z (231 bytes) @ 0x00007f5ba93a7cb0 [0x00007f5ba93a78e0+0x3d0]
J 1078 C2 com.mysql.clusterj.core.query.QueryDomainTypeImpl.getResultList(Lcom/mysql/clusterj/core/spi/QueryExecutionContext;JJLcom/mysql/clusterj/Query$Ordering;[Ljava/lang/String;)Ljava/util/List; (130 bytes) @
0x00007f5ba93fd2e0 [0x00007f5ba93fcae0+0x800]
J 1085 C2 com.mysql.clusterj.core.query.QueryImpl.getResultList()Ljava/util/List; (56 bytes) @ 0x00007f5ba93d48e4 [0x00007f5ba93d4880+0x64]
j se.kth.ndb.test.SIGSEGVTest.runTest()V+128
j se.kth.ndb.test.SIGSEGVTest.start([Ljava/lang/String;)V+18
j se.kth.ndb.test.Main.main([Ljava/lang/String;)V+8
v ~StubRoutines::call_stub
How to repeat:
We tried to reproduce the error using a simple application as follows:
1) create a table test_sigsegv
create table `test_sigsegv` (`id` int, `col1` int, `col2` varchar(3000), PRIMARY KEY(`id`), KEY `col1_index` (`col1`));
2) create a session pool with 1000 sessions by default
3) populate the table with 45 rows
4) loop:
session = getRandomSession()
for i in num_rows:
start_tx
write_lock
scan_op(i)
end_tx
The code can be found in this git repo:
https://github.com/smkniazi/clusterj-memory-usage-test
To run the test,
1) clone and compile
2) ./run -dbHost NDB_ADDRESS -schema YOUR_DATABASE -sessions 4000
and let it run for a while, eventually within seconds of a couple of minutes it will fail with a sigsegv.
Code Snippet
=====================================================
import com.mysql.clusterj.ClusterJHelper;
import com.mysql.clusterj.LockMode;
import com.mysql.clusterj.Query;
import com.mysql.clusterj.Session;
import com.mysql.clusterj.SessionFactory;
import com.mysql.clusterj.query.Predicate;
import com.mysql.clusterj.query.QueryBuilder;
import com.mysql.clusterj.query.QueryDomainType;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import java.util.List;
import java.util.Properties;
import java.util.Random;
public class SIGSEGVTest {
@Option(name = "-schema", usage = "DB schemma name. Default is test")
private String schema = "test";
@Option(name = "-dbHost", usage = "com.mysql.clusterj.connectstring. Default is bbc2")
private String dbHost = "bbc2.sics.se";
@Option(name = "-rows", usage = "Num of rows in the table. Default 45.")
private int num_rows = 45;
@Option(name = "-sessions", usage = "Num of sessions created. Default 1000.")
private int num_sessions = 1000;
@Option(name = "-help", usage = "Print usages")
private boolean help = false;
private SessionFactory sf;
private Session simpleSessionPool[];
private Random rand = new Random();
private void setUpDBConnection() throws Exception {
Properties props = new Properties();
props.setProperty("com.mysql.clusterj.connectstring", dbHost);
props.setProperty("com.mysql.clusterj.database", schema);
props.setProperty("com.mysql.clusterj.connect.retries", "4");
props.setProperty("com.mysql.clusterj.connect.delay", "5");
props.setProperty("com.mysql.clusterj.connect.verbose", "1");
props.setProperty("com.mysql.clusterj.connect.timeout.before", "30");
props.setProperty("com.mysql.clusterj.connect.timeout.after", "20");
props.setProperty("com.mysql.clusterj.max.transactions", "1024");
props.setProperty("com.mysql.clusterj.connection.pool.size", "1");
sf = ClusterJHelper.getSessionFactory(props);
createSessions();
}
private void populateDB() throws Exception {
Session session = sf.getSession();
session.currentTransaction().begin();
for (int i = 0; i < num_rows; i++) {
Table.SIGTable row = session.newInstance(Table.SIGTable.class);
row.setId(i);
row.setCol1(i+1);
row.setCol2("test"+i);
session.savePersistent(row);
}
session.currentTransaction().commit();
session.close();
System.out.println("Test data created.");
}
private void runTest(){
long startTime = System.currentTimeMillis();
int round =0;
while(true){
Session session = getSession();
for(int i=1;i<num_rows; i++){
session.currentTransaction().begin();
session.setLockMode(LockMode.EXCLUSIVE);
QueryBuilder qb = session.getQueryBuilder();
QueryDomainType<Table.SIGTable> dobj = qb.createQueryDefinition(
Table.SIGTable.class);
Predicate pred1 = dobj.get("col1").equal(dobj.param("param"));
dobj.where(pred1);
Query<Table.SIGTable> query = session.createQuery(dobj);
query.setParameter("param", i);
List<Table.SIGTable> results = query.getResultList();
session.currentTransaction().commit();
}
round++;
System.out.println("Round " + round + " ---- Total Time Taken = " + (
(System.currentTimeMillis() - startTime)/1000) + " seconds");
}
}
private void deleteAllData() throws Exception {
Session session = sf.getSession();
session.deletePersistentAll(Table.SIGTable.class);
session.close();
}
private void parseArgs(String[] args) {
CmdLineParser parser = new CmdLineParser(this);
parser.setUsageWidth(80);
try {
parser.parseArgument(args);
} catch (Exception e) {
System.err.println(e.getMessage());
parser.printUsage(System.err);
System.err.println("Make Sure to create a SIGTable for test\n " + Table
.CREATE_SIGTABLE);
System.err.println();
System.exit(-1);
}
if (help) {
parser.printUsage(System.err);
System.err.println("Make Sure to create a SIGTable for test\n " + Table
.CREATE_SIGTABLE);
System.exit(0);
}
}
private void createSessions(){
simpleSessionPool = new Session[num_sessions];
for(int s=0; s<num_sessions; s++){
simpleSessionPool[s] = sf.getSession();
}
}
private Session getSession(){
return simpleSessionPool[rand.nextInt(num_sessions)];
}
public void start(String[] argv) throws Exception {
parseArgs(argv);
setUpDBConnection();
deleteAllData();
populateDB();
runTest();
}
}