From 734ebc283bcc05cc2377860bfa23a431322614d5 Mon Sep 17 00:00:00 2001
From: hopebo <hopelee1994@gmail.com>
Date: Thu, 3 Dec 2020 18:20:21 +0800
Subject: [PATCH] Bugfix SELECT DISTINCT returns wrong results when JOIN
 enumeration shortcut

Description
-----------
Originally, for a SELECT DISTINCT query, the executor could abort
joining more records in current table on which the select fields' list
doesn't depend if at least one record has been found.

But this optimization doesn't consider the scenario of JOIN cache used
after this table. Since that, the found records may come from the old
records in the cache instead of the current being joined record.

Fix
---
If there are any JOIN cache used after the table, we should forbit
this optimization of shortcutting JOIN enumeration.
---
 sql/sql_executor.cc | 12 +++++++-----
 sql/sql_executor.h  | 13 +++++++++++--
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/sql/sql_executor.cc b/sql/sql_executor.cc
index 92df4078385..d74c257ec09 100644
--- a/sql/sql_executor.cc
+++ b/sql/sql_executor.cc
@@ -520,7 +520,8 @@ void JOIN::optimize_distinct() {
   for (int i = primary_tables - 1; i >= 0; --i) {
     QEP_TAB *last_tab = qep_tab + i;
     if (select_lex->select_list_tables & last_tab->table_ref->map()) break;
-    last_tab->not_used_in_distinct = true;
+    last_tab->not_used_in_distinct_and_no_join_cache_after = true;
+    if (best_ref[i]->use_join_cache() != JOIN_CACHE::ALG_NONE) break;
   }
 
   /* Optimize "select distinct b from t1 order by key_part_1 limit #" */
@@ -1701,7 +1702,6 @@ static int do_sj_reset(SJ_TMP_TABLE *sj_tbl) {
 
 static enum_nested_loop_state evaluate_join_record(JOIN *join,
                                                    QEP_TAB *const qep_tab) {
-  bool not_used_in_distinct = qep_tab->not_used_in_distinct;
   ha_rows found_records = join->found_records;
   Item *condition = qep_tab->condition();
   const plan_idx qep_tab_idx = qep_tab->idx();
@@ -1876,10 +1876,12 @@ static enum_nested_loop_state evaluate_join_record(JOIN *join,
 
       /*
         Test if this was a SELECT DISTINCT query on a table that
-        was not in the field list;  In this case we can abort if
-        we found a row, as no new rows can be added to the result.
+        was not in the field list and no JOIN CACHE was used after
+        this table;  In this case we can abort if we found a row,
+        as no new rows can be added to the result.
       */
-      if (not_used_in_distinct && found_records != join->found_records)
+      if (qep_tab->not_used_in_distinct_and_no_join_cache_after &&
+          found_records != join->found_records)
         set_if_smaller(return_tab, qep_tab_idx - 1);
 
       set_if_smaller(join->return_tab, return_tab);
diff --git a/sql/sql_executor.h b/sql/sql_executor.h
index b0c635ba2b8..83633092dbb 100644
--- a/sql/sql_executor.h
+++ b/sql/sql_executor.h
@@ -415,7 +415,7 @@ class QEP_TAB : public QEP_shared_owner {
         used_uneven_bit_fields(false),
         keep_current_rowid(false),
         copy_current_rowid(NULL),
-        not_used_in_distinct(false),
+        not_used_in_distinct_and_no_join_cache_after(false),
         cache_idx_cond(NULL),
         having(NULL),
         op(NULL),
@@ -602,7 +602,16 @@ class QEP_TAB : public QEP_shared_owner {
   /** true <=> remove duplicates on this table. */
   bool needs_duplicate_removal = false;
 
-  bool not_used_in_distinct;
+  /**
+    Used to shortcut join enumeration when this is a SELECT_DISTINCT query that
+    the select fields' list doesn't depend on some tables. If we have found one
+    record, we can skip the unfinished join process.
+
+    But if there is JOIN CACHE used after this table, we cannot apply this
+    optimization because the found records may come from the old records in the
+    cache instead of the current being joined record.
+  */
+  bool not_used_in_distinct_and_no_join_cache_after;
 
   /// Index condition for BKA access join
   Item *cache_idx_cond;
-- 
2.23.0