Description:
The function flush_cache, which is called to execute the statement RESET QUERY CACHE has a race condition in which in some rare cases leads to assignment of unallocated memory.
The race doesn't lead to a crash, but writing to uncharted memory is a bad idea.
How to repeat:
Add two debug hooks:
<snip>
void query_cache_insert(NET *net, const char *packet, ulong length)
{
DBUG_ENTER("query_cache_insert");
/* See the comment on double-check locking usage above. */
if (net->query_cache_query == 0)
DBUG_VOID_RETURN;
DBUG_EXECUTE_IF("wait_in_query_cache_insert2",
static bool exec_once= FALSE;
if (!exec_once)
debug_wait_for_kill("wait_in_query_cache_insert2");
exec_once= TRUE; );
STRUCT_LOCK(&query_cache.structure_guard_mutex);
<snip>
And in void query_cache_insert(NET *net, const char *packet, ulong length)
<snip>
/* NOTE: structure guard mutex has been released. */
DBUG_EXECUTE_IF("wait_in_query_cache_insert",
debug_wait_for_kill("wait_in_query_cache_insert"); );
header->result(result);
header->last_pkt_nr= net->pkt_nr;
BLOCK_UNLOCK_WR(query_block);
DBUG_EXECUTE("check_querycache",query_cache.check_integrity(0););
DBUG_VOID_RETURN;
}
<snip>
Execute this test case:
flush status;
set query_cache_type=DEMAND;
set global query_cache_size= 1024*1024*512;
drop table if exists t1;
create table t1 (a varchar(100));
insert into t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
grant all on test.* to `bug30877`@`localhost`;
connect (select, localhost, root, ,test);
connect (flush, localhost, root, ,test);
connection select;
--echo Start a cachable select statement..
set session debug='+d,wait_in_query_cache_insert2';
--send select SQL_CACHE * from t1;
connection default;
--echo ..and wait for it to be caught by the debug hook.
let $wait_condition= select count(*)= 1 from information_schema.processlist where state= 'wait_in_query_cache_insert2';
--source include/wait_condition.inc
connection flush;
--echo On a second connection; reset the query cache...
show status like 'Qcache_queries_in_cache';
set session debug='+d,wait_in_query_cache_flush';
--send reset query cache;
connection default;
--echo ..and wait for the second debug hook.
let $wait_condition= select count(*)= 1 from information_schema.processlist where state= 'wait_in_query_cache_flush';
--source include/wait_condition.inc
--echo Both the select-,and reset-threads are now caught by debug hooks.
select id,state from information_schema.processlist where state like '%query_cache%';
--echo First thread is waiting to insert result blocks into the query cache.
--echo Second thread has set the state FLUSH_IN_PROGRESS but has not yet begun invalidating.
--echo Signal the first debug hook to release the lock on the select statement.
select id from information_schema.processlist where state='wait_in_query_cache_insert2' into @thread_id;
kill query @thread_id;
--echo The writer thread discovers that the query cache is in the flush-state and aborts.
show status like 'Qcache_queries_in_cache';
connection select;
--reap
--echo Signal the second debug hook to release the lock on the reset statement.
select id from information_schema.processlist where state='wait_in_query_cache_flush' into @thread_id;
kill query @thread_id;
--echo The second thread proceeds with clearing the cache and will through free_query_internal reference memory which isn't suppose to be valid any more since the associated session died.
connection flush;
--reap
connection default;
use test;
drop table t1;
Suggested fix:
Supply a test case which can make a difference between success and failure. :)
It is possibly enough to path query_cache_insert to this:
<snip>
STRUCT_LOCK(&query_cache.structure_guard_mutex);
bool interrupt;
query_cache.wait_while_table_flush_is_in_progress(&interrupt);
Query_cache_block *query_block= (Query_cache_block*)net->query_cache_query;
if (!query_block)
{
/*
We lost the writer and the currently processed query has been
invalidated; there is nothing left to do.
*/
STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
DBUG_VOID_RETURN;
}
Query_cache_query *header= query_block->query();
BLOCK_LOCK_WR(query_block);
/*
If the entire cache is going to be cleared there is no point in
continuing.
*/
if (interrupt)
{
/*
Drop the writer.
*/
if (header->writer() != NULL)
{
header->writer(NULL);
header->writer()->query_cache_query= NULL;
}
STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
DBUG_VOID_RETURN;
}
Query_cache_block *result= header->result();
<snip>
Description: The function flush_cache, which is called to execute the statement RESET QUERY CACHE has a race condition in which in some rare cases leads to assignment of unallocated memory. The race doesn't lead to a crash, but writing to uncharted memory is a bad idea. How to repeat: Add two debug hooks: <snip> void query_cache_insert(NET *net, const char *packet, ulong length) { DBUG_ENTER("query_cache_insert"); /* See the comment on double-check locking usage above. */ if (net->query_cache_query == 0) DBUG_VOID_RETURN; DBUG_EXECUTE_IF("wait_in_query_cache_insert2", static bool exec_once= FALSE; if (!exec_once) debug_wait_for_kill("wait_in_query_cache_insert2"); exec_once= TRUE; ); STRUCT_LOCK(&query_cache.structure_guard_mutex); <snip> And in void query_cache_insert(NET *net, const char *packet, ulong length) <snip> /* NOTE: structure guard mutex has been released. */ DBUG_EXECUTE_IF("wait_in_query_cache_insert", debug_wait_for_kill("wait_in_query_cache_insert"); ); header->result(result); header->last_pkt_nr= net->pkt_nr; BLOCK_UNLOCK_WR(query_block); DBUG_EXECUTE("check_querycache",query_cache.check_integrity(0);); DBUG_VOID_RETURN; } <snip> Execute this test case: flush status; set query_cache_type=DEMAND; set global query_cache_size= 1024*1024*512; drop table if exists t1; create table t1 (a varchar(100)); insert into t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'); grant all on test.* to `bug30877`@`localhost`; connect (select, localhost, root, ,test); connect (flush, localhost, root, ,test); connection select; --echo Start a cachable select statement.. set session debug='+d,wait_in_query_cache_insert2'; --send select SQL_CACHE * from t1; connection default; --echo ..and wait for it to be caught by the debug hook. let $wait_condition= select count(*)= 1 from information_schema.processlist where state= 'wait_in_query_cache_insert2'; --source include/wait_condition.inc connection flush; --echo On a second connection; reset the query cache... show status like 'Qcache_queries_in_cache'; set session debug='+d,wait_in_query_cache_flush'; --send reset query cache; connection default; --echo ..and wait for the second debug hook. let $wait_condition= select count(*)= 1 from information_schema.processlist where state= 'wait_in_query_cache_flush'; --source include/wait_condition.inc --echo Both the select-,and reset-threads are now caught by debug hooks. select id,state from information_schema.processlist where state like '%query_cache%'; --echo First thread is waiting to insert result blocks into the query cache. --echo Second thread has set the state FLUSH_IN_PROGRESS but has not yet begun invalidating. --echo Signal the first debug hook to release the lock on the select statement. select id from information_schema.processlist where state='wait_in_query_cache_insert2' into @thread_id; kill query @thread_id; --echo The writer thread discovers that the query cache is in the flush-state and aborts. show status like 'Qcache_queries_in_cache'; connection select; --reap --echo Signal the second debug hook to release the lock on the reset statement. select id from information_schema.processlist where state='wait_in_query_cache_flush' into @thread_id; kill query @thread_id; --echo The second thread proceeds with clearing the cache and will through free_query_internal reference memory which isn't suppose to be valid any more since the associated session died. connection flush; --reap connection default; use test; drop table t1; Suggested fix: Supply a test case which can make a difference between success and failure. :) It is possibly enough to path query_cache_insert to this: <snip> STRUCT_LOCK(&query_cache.structure_guard_mutex); bool interrupt; query_cache.wait_while_table_flush_is_in_progress(&interrupt); Query_cache_block *query_block= (Query_cache_block*)net->query_cache_query; if (!query_block) { /* We lost the writer and the currently processed query has been invalidated; there is nothing left to do. */ STRUCT_UNLOCK(&query_cache.structure_guard_mutex); DBUG_VOID_RETURN; } Query_cache_query *header= query_block->query(); BLOCK_LOCK_WR(query_block); /* If the entire cache is going to be cleared there is no point in continuing. */ if (interrupt) { /* Drop the writer. */ if (header->writer() != NULL) { header->writer(NULL); header->writer()->query_cache_query= NULL; } STRUCT_UNLOCK(&query_cache.structure_guard_mutex); DBUG_VOID_RETURN; } Query_cache_block *result= header->result(); <snip>