MySQL Bugs: #32944: Race condition while doing RESET QUERY CACHE

Bug #32944	Race condition while doing RESET QUERY CACHE
Submitted:	3 Dec 2007 19:53	Modified:	7 Jul 2017 9:43
Reporter:	Kristofer Pettersson	Email Updates:
Status:	Won't fix	Impact on me:	None
Category:	MySQL Server: Query Cache	Severity:	S3 (Non-critical)
Version:	5.1	OS:	Any
Assigned to:	Assigned Account	CPU Architecture:	Any

Description:
The function flush_cache, which is called to execute the statement RESET QUERY CACHE has a race condition in which in some rare cases leads to assignment of unallocated memory.

The race doesn't lead to a crash, but writing to uncharted memory is a bad idea.

How to repeat:
Add two debug hooks:
<snip>
void query_cache_insert(NET *net, const char *packet, ulong length)
{
  DBUG_ENTER("query_cache_insert");

  /* See the comment on double-check locking usage above. */
  if (net->query_cache_query == 0)
    DBUG_VOID_RETURN;
  DBUG_EXECUTE_IF("wait_in_query_cache_insert2",
                  static bool exec_once= FALSE;
                  if (!exec_once)
                    debug_wait_for_kill("wait_in_query_cache_insert2");
                  exec_once= TRUE; );

  STRUCT_LOCK(&query_cache.structure_guard_mutex);
<snip>

And in void query_cache_insert(NET *net, const char *packet, ulong length)

<snip>
  /* NOTE: structure guard mutex has been released. */
  DBUG_EXECUTE_IF("wait_in_query_cache_insert",
                  debug_wait_for_kill("wait_in_query_cache_insert"); );
  header->result(result);
  header->last_pkt_nr= net->pkt_nr;
  BLOCK_UNLOCK_WR(query_block);
  DBUG_EXECUTE("check_querycache",query_cache.check_integrity(0););

  DBUG_VOID_RETURN;
}
<snip>

Execute this test case:

flush status;
set query_cache_type=DEMAND;
set global query_cache_size= 1024*1024*512;
drop table if exists t1;
create table t1 (a varchar(100));
insert into t1 values ('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa');
grant all on test.* to `bug30877`@`localhost`;
connect (select, localhost, root, ,test);
connect (flush, localhost, root, ,test);

connection select;
--echo Start a cachable select statement..
set session debug='+d,wait_in_query_cache_insert2';
--send select SQL_CACHE * from t1;

connection default;
--echo ..and wait for it to be caught by the debug hook.
let $wait_condition= select count(*)= 1 from information_schema.processlist where state= 'wait_in_query_cache_insert2';
--source include/wait_condition.inc

connection flush;
--echo On a second connection; reset the query cache...
show status like 'Qcache_queries_in_cache';
set session debug='+d,wait_in_query_cache_flush';
--send reset query cache;

connection default;
--echo ..and wait for the second debug hook.
let $wait_condition= select count(*)= 1 from information_schema.processlist where state= 'wait_in_query_cache_flush';
--source include/wait_condition.inc

--echo Both the select-,and reset-threads are now caught by debug hooks.
select id,state from information_schema.processlist where state like '%query_cache%';
--echo First thread is waiting to insert result blocks into the query cache.
--echo Second thread has set the state FLUSH_IN_PROGRESS but has not yet begun invalidating.

--echo Signal the first debug hook to release the lock on the select statement.
select id from information_schema.processlist where state='wait_in_query_cache_insert2' into @thread_id;
kill query @thread_id;

--echo The writer thread discovers that the query cache is in the flush-state and aborts.
show status like 'Qcache_queries_in_cache';
connection select;
--reap

--echo Signal the second debug hook to release the lock on the reset statement.
select id from information_schema.processlist where state='wait_in_query_cache_flush' into @thread_id;
kill query @thread_id;

--echo The second thread proceeds with clearing the cache and will through free_query_internal reference memory which isn't suppose to be valid any more since the associated session died.

connection flush;
--reap
connection default;
use test;
drop table t1;

Suggested fix:
Supply a test case which can make a difference between success and failure. :)

It is possibly enough to path query_cache_insert to this:

<snip>
  STRUCT_LOCK(&query_cache.structure_guard_mutex);
  bool interrupt;
  query_cache.wait_while_table_flush_is_in_progress(&interrupt);
  Query_cache_block *query_block= (Query_cache_block*)net->query_cache_query;

  if (!query_block)
  {
    /*
      We lost the writer and the currently processed query has been
      invalidated; there is nothing left to do.
    */
    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
    DBUG_VOID_RETURN;
  }
  Query_cache_query *header= query_block->query();
  BLOCK_LOCK_WR(query_block);

  /*
    If the entire cache is going to be cleared there is no point in
    continuing.
  */
  if (interrupt)
  {
    /*
       Drop the writer.
    */
    if (header->writer() != NULL)
    {
      header->writer(NULL);
      header->writer()->query_cache_query= NULL;
    }
    STRUCT_UNLOCK(&query_cache.structure_guard_mutex);
    DBUG_VOID_RETURN;
  }

  Query_cache_block *result= header->result();
<snip>

MySQL will no longer invest in the query cache, see:

http://mysqlserverteam.com/mysql-8-0-retiring-support-for-the-query-cache/