diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/include/my_base.h mysql-8.0.3-rc/include/my_base.h --- mysql-8.0.3-rc/include/my_base.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/include/my_base.h 2020-04-19 23:52:40.166543813 +0800 @@ -992,6 +992,9 @@ is the global server default. */ #define HA_ERR_NO_WAIT_LOCK 203 /* Don't wait for record lock */ #define HA_ERR_LAST 203 /* Copy of last error nr */ +/* Flashback */ +#define HA_END_OF_ORIG_SCAN -200 /* End flag of the scan for original table */ + /* Number of different errors */ #define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1) diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/include/mysql/plugin_audit.h.pp mysql-8.0.3-rc/include/mysql/plugin_audit.h.pp --- mysql-8.0.3-rc/include/mysql/plugin_audit.h.pp 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/include/mysql/plugin_audit.h.pp 2020-04-19 23:52:40.162543813 +0800 @@ -332,6 +332,9 @@ enum enum_sql_command { SQLCOM_CLONE, SQLCOM_LOCK_INSTANCE, SQLCOM_UNLOCK_INSTANCE, + SQLCOM_TRXTOTIME, + SQLCOM_TIMETOTRX, + SQLCOM_CURRENTTRX, SQLCOM_END }; typedef enum diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/include/mysql/plugin.h mysql-8.0.3-rc/include/mysql/plugin.h --- mysql-8.0.3-rc/include/mysql/plugin.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/include/mysql/plugin.h 2020-04-19 23:52:40.162543813 +0800 @@ -58,9 +58,13 @@ #ifdef __cplusplus class THD; class Item; +/* Flashback */ +class Temporal_hint; #define MYSQL_THD THD* +#define MYSQL_TEMPORAL_HINT Temporal_hint* #else #define MYSQL_THD void* +#define MYSQL_TEMPORAL_HINT void* #endif typedef void * MYSQL_PLUGIN; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/include/my_sqlcommand.h mysql-8.0.3-rc/include/my_sqlcommand.h --- mysql-8.0.3-rc/include/my_sqlcommand.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/include/my_sqlcommand.h 2020-04-19 23:52:40.162543813 +0800 @@ -192,6 +192,9 @@ enum enum_sql_command { SQLCOM_CLONE, SQLCOM_LOCK_INSTANCE, SQLCOM_UNLOCK_INSTANCE, + SQLCOM_TRXTOTIME, /* Flashback */ + SQLCOM_TIMETOTRX, /* Flashback */ + SQLCOM_CURRENTTRX, /* Flashback */ /* This should be the last !!! */ SQLCOM_END }; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/share/errmsg-utf8.txt mysql-8.0.3-rc/share/errmsg-utf8.txt --- mysql-8.0.3-rc/share/errmsg-utf8.txt 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/share/errmsg-utf8.txt 2020-04-19 23:52:39.578543835 +0800 @@ -10904,6 +10904,27 @@ ER_RESOURCE_GROUP_BIND_FAILED ER_INVALID_USE_OF_FORCE_OPTION eng "Option FORCE invalid as DISABLE option is not specified." +ER_TEMPORARY_TEMPORAL_CONFLICT + eng "Temporal table can not be temporary." + +ER_CREATE_TEMPORAL_NOT_SUPPORTED + eng "Can not create temporal table in the way of \"(%s)\"." + +ER_ONLY_SELECT_SUPPORTS_TEMPORAL + eng "Only select statement supports temporal features." + +ER_ONLY_ORIG_TABLE_SUPPORTS_TEMPORAL + eng "Only original table supports temporal features." + +ER_HIST_TABLE_NOT_ACCESSIBLE_DIRECTLY + eng "Can not access history table \"(%s)\" directly." + +ER_HIST_TABLE_DELETE_NOT_PERMITTED + eng "History can only be deleted by super user, under admin connection." + +ER_ALTER_DEFAULT_HISTORY_TABLESPACE + eng "InnoDB: `%s` is a reserved tablespace name." + # # End of 8.0 error messages. # diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/dd_table.cc mysql-8.0.3-rc/sql/dd/dd_table.cc --- mysql-8.0.3-rc/sql/dd/dd_table.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/dd_table.cc 2020-04-19 23:52:39.622543834 +0800 @@ -2298,6 +2298,12 @@ static bool fill_dd_table_from_create_in thd->work_part_info)) return true; + /* Flashback */ + if (create_info->options & HA_LEX_CREATING_FLASHBACK_TABLE) + tab_obj->set_hist_tbl(true); + else if (create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE) + tab_obj->set_orig_tbl(true); + return false; } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/impl/system_views/tables.cc mysql-8.0.3-rc/sql/dd/impl/system_views/tables.cc --- mysql-8.0.3-rc/sql/dd/impl/system_views/tables.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/impl/system_views/tables.cc 2020-04-19 23:52:39.626543833 +0800 @@ -54,7 +54,12 @@ Tables_base::Tables_base() m_target_def.add_field(FIELD_TABLE_COMMENT,"TABLE_COMMENT", "INTERNAL_GET_COMMENT_OR_ERROR(sch.name, tbl.name, tbl.type, " "tbl.options, tbl.comment)"); - + /* Flashback */ + m_target_def.add_field(FIELD_HIST_TABLE, "HIST_TABLE", + "IF(tbl.is_history_table > 0, 'YES', 'NO')"); + m_target_def.add_field(FIELD_ORIG_TABLE, "ORIG_TABLE", + "IF(tbl.has_history_table > 0, 'YES', 'NO')"); + m_target_def.add_from("mysql.tables tbl"); m_target_def.add_from("JOIN mysql.schemata sch ON tbl.schema_id=sch.id"); m_target_def.add_from("JOIN mysql.catalogs cat ON " "cat.id=sch.catalog_id"); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/impl/system_views/tables.h mysql-8.0.3-rc/sql/dd/impl/system_views/tables.h --- mysql-8.0.3-rc/sql/dd/impl/system_views/tables.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/impl/system_views/tables.h 2020-04-19 23:52:39.626543833 +0800 @@ -51,7 +51,10 @@ public: FIELD_TABLE_COLLATION, FIELD_CHECKSUM, FIELD_CREATE_OPTIONS, - FIELD_TABLE_COMMENT + FIELD_TABLE_COMMENT, + /* Flashback */ + FIELD_HIST_TABLE, //Whether this table is a history table. + FIELD_ORIG_TABLE //Whether this table has a corresponding history table. }; Tables_base(); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/impl/tables/tables.cc mysql-8.0.3-rc/sql/dd/impl/tables/tables.cc --- mysql-8.0.3-rc/sql/dd/impl/tables/tables.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/impl/tables/tables.cc 2020-04-19 23:52:39.626543833 +0800 @@ -169,6 +169,13 @@ Tables::Tables() m_target_def.add_field(FIELD_VIEW_COLUMN_NAMES, "FIELD_VIEW_COLUMN_NAMES", "view_column_names LONGTEXT"); + /* Flashback */ + m_target_def.add_field(FIELD_HIST_TABLE, + "FIELD_HIST_TABLE", + "is_history_table INT"); + m_target_def.add_field(FIELD_ORIG_TABLE, + "FIELD_ORIG_TABLE", + "has_history_table INT"); m_target_def.add_index("PRIMARY KEY (id)"); m_target_def.add_index("UNIQUE KEY (schema_id, name)"); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/impl/tables/tables.h mysql-8.0.3-rc/sql/dd/impl/tables/tables.h --- mysql-8.0.3-rc/sql/dd/impl/tables/tables.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/impl/tables/tables.h 2020-04-19 23:52:39.630543833 +0800 @@ -83,7 +83,10 @@ public: FIELD_VIEW_DEFINER, FIELD_VIEW_CLIENT_COLLATION_ID, FIELD_VIEW_CONNECTION_COLLATION_ID, - FIELD_VIEW_COLUMN_NAMES + FIELD_VIEW_COLUMN_NAMES, + /* Flashback */ + FIELD_HIST_TABLE, //Whether this is a history table. + FIELD_ORIG_TABLE //Whether this is an original table. }; public: diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/impl/types/abstract_table_impl.cc mysql-8.0.3-rc/sql/dd/impl/types/abstract_table_impl.cc --- mysql-8.0.3-rc/sql/dd/impl/types/abstract_table_impl.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/impl/types/abstract_table_impl.cc 2020-04-19 23:52:39.622543834 +0800 @@ -79,6 +79,8 @@ Abstract_table_impl::Abstract_table_impl m_columns(), m_schema_id(INVALID_OBJECT_ID) { + set_hist_tbl(false); + set_orig_tbl(false); } /////////////////////////////////////////////////////////////////////////// @@ -156,6 +158,10 @@ bool Abstract_table_impl::restore_attrib set_options_raw(r.read_str(Tables::FIELD_OPTIONS, "")); + /* Flashback */ + set_hist_tbl(static_cast(r.read_int(Tables::FIELD_HIST_TABLE))); + set_orig_tbl(static_cast(r.read_int(Tables::FIELD_ORIG_TABLE))); + return false; } @@ -190,7 +196,10 @@ bool Abstract_table_impl::store_attribut r->store(Tables::FIELD_OPTIONS, *m_options) || r->store(Tables::FIELD_CREATED, m_created) || r->store(Tables::FIELD_LAST_ALTERED, m_last_altered) || - r->store(Tables::FIELD_HIDDEN, static_cast(m_hidden)); + r->store(Tables::FIELD_HIDDEN, static_cast(m_hidden)) || + r->store(Tables::FIELD_HIST_TABLE, static_cast(get_hist_tbl())) || + r->store(Tables::FIELD_ORIG_TABLE, static_cast(get_orig_tbl())); + } /////////////////////////////////////////////////////////////////////////// @@ -369,5 +378,7 @@ Abstract_table_impl::Abstract_table_impl m_schema_id(src.m_schema_id) { m_columns.deep_copy(src.m_columns, this); + set_hist_tbl(src.get_hist_tbl()); + set_orig_tbl(src.get_orig_tbl()); } } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/types/abstract_table.h mysql-8.0.3-rc/sql/dd/types/abstract_table.h --- mysql-8.0.3-rc/sql/dd/types/abstract_table.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/types/abstract_table.h 2020-04-19 23:52:39.618543834 +0800 @@ -179,6 +179,32 @@ public: @return pointer to dynamically allocated copy */ virtual Abstract_table *clone() const = 0; + +/* Flashback */ +private: + bool hist_tbl; + bool orig_tbl; + +public: + void set_hist_tbl(bool hist_tbl) + { + this->hist_tbl= hist_tbl; + } + + bool get_hist_tbl() const + { + return hist_tbl; + } + + void set_orig_tbl(bool orig_tbl) + { + this->orig_tbl= orig_tbl; + } + + bool get_orig_tbl() const + { + return orig_tbl; + } }; /////////////////////////////////////////////////////////////////////////// diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/dd/types/partition.h mysql-8.0.3-rc/sql/dd/types/partition.h --- mysql-8.0.3-rc/sql/dd/types/partition.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/dd/types/partition.h 2020-04-19 23:52:39.618543834 +0800 @@ -196,6 +196,11 @@ public: */ virtual bool deserialize(Sdi_rcontext *rctx, const RJ_Value &val) = 0; + +/* Flashback: clear compile error */ +public: + virtual bool get_orig_tbl() const { return false; } + virtual bool get_hist_tbl() const { return false; } }; /////////////////////////////////////////////////////////////////////////// diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/filesort.cc mysql-8.0.3-rc/sql/filesort.cc --- mysql-8.0.3-rc/sql/filesort.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/filesort.cc 2020-05-03 10:28:27.576776539 +0800 @@ -1148,7 +1148,12 @@ static ha_rows find_all_keys(THD *thd, S if (quick_select) { if ((error= qep_tab->quick()->get_next())) - break; + { + if (error == HA_END_OF_ORIG_SCAN) /* Flashback */ + continue; + else + break; + } file->position(sort_form->record[0]); DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE);); } @@ -1166,7 +1171,13 @@ static ha_rows find_all_keys(THD *thd, S file->position(sort_form->record[0]); } if (error && error != HA_ERR_RECORD_DELETED) - break; + { + if (error == HA_END_OF_ORIG_SCAN) /* Flashback */ + continue; + else + break; + } + } if (*killed) diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/gen_lex_token.cc mysql-8.0.3-rc/sql/gen_lex_token.cc --- mysql-8.0.3-rc/sql/gen_lex_token.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/gen_lex_token.cc 2020-04-19 23:52:39.598543834 +0800 @@ -209,6 +209,9 @@ static void compute_tokens() set_token(JSON_SEPARATOR_SYM, "->"); set_token(JSON_UNQUOTED_SEPARATOR_SYM, "->>"); + /* Flashback */ + set_token(WITH_TEMPORAL_SYM, "WITH TEMPORAL"); + /* Values. These tokens are all normalized later, diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/handler.cc mysql-8.0.3-rc/sql/handler.cc --- mysql-8.0.3-rc/sql/handler.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/handler.cc 2020-04-19 23:52:39.606543834 +0800 @@ -6708,7 +6708,8 @@ int handler::multi_range_read_next(char index. */ if (!((mrr_cur_range.range_flag & UNIQUE_RANGE) && - (mrr_cur_range.range_flag & EQ_RANGE))) + (mrr_cur_range.range_flag & EQ_RANGE)) || + table->pos_in_table_list->hist_tbl) { result= read_range_next(); /* On success or non-EOF errors jump to the end. */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/handler.h mysql-8.0.3-rc/sql/handler.h --- mysql-8.0.3-rc/sql/handler.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/handler.h 2020-05-03 10:30:11.460772544 +0800 @@ -574,6 +574,8 @@ enum enum_alter_inplace_result { #define HA_LEX_CREATE_IF_NOT_EXISTS 2 #define HA_LEX_CREATE_TABLE_LIKE 4 #define HA_LEX_CREATE_INTERNAL_TMP_TABLE 8 +#define HA_LEX_CREATE_FLASHBACK_TABLE 16 /* Flashback */ +#define HA_LEX_CREATING_FLASHBACK_TABLE 32 /* Flashback */ #define HA_MAX_REC_LENGTH 65535U /** @@ -707,6 +709,9 @@ given at all. */ /** COMPRESSION="zlib|lz4|none" used during table create. */ #define HA_CREATE_USED_COMPRESS (1L << 26) +/** Flashback */ +#define HA_CREATE_USED_HIST_TABLESPACE (1L << 27) + /* Structure to hold list of database_name.table_name. This is used at both mysqld and storage engine layer. @@ -2081,7 +2086,8 @@ typedef struct st_ha_create_information } const CHARSET_INFO *table_charset, *default_table_charset; LEX_STRING connect_string; - const char *password, *tablespace; + /* Flashback: add hist_tablespace into HA_CREATE_INFO */ + const char *password, *tablespace, *hist_tablespace; LEX_STRING comment; /** @@ -3061,6 +3067,14 @@ public: } }; +/** Status of transaction. */ +enum tlog_trx_status : unsigned char +{ + TLOG_TRX_STATUS_NOT_STARTED = 0x00, + TLOG_TRX_STATUS_IN_PROGRESS = 0x01, + TLOG_TRX_STATUS_ABORT = 0x02, + TLOG_TRX_STATUS_COMMIT = 0x03 +}; /** The handler class is the interface for dynamically loadable @@ -5720,8 +5734,30 @@ protected: void unlock_shared_ha_data(); friend class DsMrr_impl; -}; +/* Flashback */ +public: + typedef struct + { + tlog_trx_status status; + timeval start_time; + timeval finish_time; + } tlog_t; + + typedef std::pair tlog_info; + + virtual int trxtotime(THD* thd MY_ATTRIBUTE((unused)), + std::vector* ans + MY_ATTRIBUTE((unused))) { return 0; } + + virtual int timetotrx(THD* thd MY_ATTRIBUTE((unused)), + std::vector* ans + MY_ATTRIBUTE((unused))) { return 0; } + + virtual int currenttrx(THD* thd MY_ATTRIBUTE((unused)), + ulonglong* current_trx_id + MY_ATTRIBUTE((unused))) { return 0; } +}; /** Function identifies any old data type present in table. diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/lex.h mysql-8.0.3-rc/sql/lex.h --- mysql-8.0.3-rc/sql/lex.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/lex.h 2020-04-19 23:52:39.586543835 +0800 @@ -724,6 +724,9 @@ static const SYMBOL symbols[] = { { SYM("YEAR_MONTH", YEAR_MONTH_SYM)}, { SYM("ZEROFILL", ZEROFILL_SYM)}, { SYM("||", OR_OR_SYM)}, + { SYM("TRXTOTIME", TRXTOTIME_SYM)}, /* Flashback */ + { SYM("TIMETOTRX", TIMETOTRX_SYM)}, /* Flashback */ + { SYM("CURRENTTRX", CURRENTTRX_SYM)}, /* Flashback */ /* Place keywords that accept optimizer hints below this comment. */ @@ -799,7 +802,10 @@ static const SYMBOL symbols[] = { { SYM_H("JOIN_FIXED_ORDER", JOIN_FIXED_ORDER_HINT)}, { SYM_H("INDEX_MERGE", INDEX_MERGE_HINT)}, { SYM_H("NO_INDEX_MERGE", NO_INDEX_MERGE_HINT)}, - { SYM_H("RESOURCE_GROUP", RESOURCE_GROUP_HINT)} + { SYM_H("RESOURCE_GROUP", RESOURCE_GROUP_HINT)}, + /* Flashback */ + { SYM("TEMPORAL", TEMPORAL_SYM)}, + { SYM("HIST_TABLESPACE", HIST_TABLESPACE_SYM)} }; #endif /* LEX_INCLUDED */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/mdl.cc mysql-8.0.3-rc/sql/mdl.cc --- mysql-8.0.3-rc/sql/mdl.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/mdl.cc 2020-04-19 23:52:39.610543834 +0800 @@ -126,7 +126,8 @@ PSI_stage_info MDL_key::m_namespace_to_w {0, "Waiting for acl cache lock", 0, PSI_DOCUMENT_ME}, {0, "Waiting for column statistics lock", 0, PSI_DOCUMENT_ME}, {0, "Waiting for backup lock", 0, PSI_DOCUMENT_ME}, - {0, "Waiting for resource groups metadata lock", 0, PSI_DOCUMENT_ME} + {0, "Waiting for resource groups metadata lock", 0, PSI_DOCUMENT_ME}, + {0, "Waiting for flashback lock", 0, PSI_DOCUMENT_ME} }; #ifdef HAVE_PSI_INTERFACE @@ -246,7 +247,8 @@ public: return (mdl_key->mdl_namespace() == MDL_key::GLOBAL || mdl_key->mdl_namespace() == MDL_key::COMMIT || mdl_key->mdl_namespace() == MDL_key::ACL_CACHE || - mdl_key->mdl_namespace() == MDL_key::BACKUP_LOCK); + mdl_key->mdl_namespace() == MDL_key::BACKUP_LOCK || + mdl_key->mdl_namespace() == MDL_key::FLASHBACK); } private: @@ -264,6 +266,8 @@ private: /** Pre-allocated MDL_lock object for BACKUP_LOCK namespace. */ MDL_lock *m_backup_lock; + MDL_lock *m_flashback_lock; + /** Number of unused MDL_lock objects in the server. @@ -1182,11 +1186,13 @@ void MDL_map::init() MDL_key commit_lock_key(MDL_key::COMMIT, "", ""); MDL_key acl_cache_lock_key(MDL_key::ACL_CACHE, "", ""); MDL_key backup_lock_key(MDL_key::BACKUP_LOCK, "", ""); + MDL_key flashback_lock_key(MDL_key::FLASHBACK, "", ""); m_global_lock= MDL_lock::create(&global_lock_key); m_commit_lock= MDL_lock::create(&commit_lock_key); m_acl_cache_lock= MDL_lock::create(&acl_cache_lock_key); m_backup_lock= MDL_lock::create(&backup_lock_key); + m_flashback_lock= MDL_lock::create(&flashback_lock_key); m_unused_lock_objects= 0; @@ -1207,6 +1213,7 @@ void MDL_map::destroy() MDL_lock::destroy(m_commit_lock); MDL_lock::destroy(m_acl_cache_lock); MDL_lock::destroy(m_backup_lock); + MDL_lock::destroy(m_flashback_lock); lf_hash_destroy(&m_locks); } @@ -1259,6 +1266,9 @@ MDL_lock* MDL_map::find(LF_PINS *pins, c case MDL_key::BACKUP_LOCK: lock= m_backup_lock; break; + case MDL_key::FLASHBACK: + lock= m_flashback_lock; + break; default: DBUG_ASSERT(false); } @@ -1707,6 +1717,7 @@ inline void MDL_lock::reinit(const MDL_k case MDL_key::COMMIT: case MDL_key::BACKUP_LOCK: case MDL_key::RESOURCE_GROUPS: + case MDL_key::FLASHBACK: m_strategy= &m_scoped_lock_strategy; break; default: @@ -1745,6 +1756,7 @@ MDL_lock::get_unobtrusive_lock_increment case MDL_key::SCHEMA: case MDL_key::COMMIT: case MDL_key::BACKUP_LOCK: + case MDL_key::FLASHBACK: return m_scoped_lock_strategy.m_unobtrusive_lock_increment[request->type]; default: return m_object_lock_strategy.m_unobtrusive_lock_increment[request->type]; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/mdl.h mysql-8.0.3-rc/sql/mdl.h --- mysql-8.0.3-rc/sql/mdl.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/mdl.h 2020-04-19 23:52:39.582543835 +0800 @@ -387,6 +387,7 @@ public: COLUMN_STATISTICS, BACKUP_LOCK, RESOURCE_GROUPS, + FLASHBACK, /* This should be the last ! */ NAMESPACE_END }; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/mysqld.cc mysql-8.0.3-rc/sql/mysqld.cc --- mysql-8.0.3-rc/sql/mysqld.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/mysqld.cc 2020-04-19 23:52:39.610543834 +0800 @@ -3156,6 +3156,9 @@ SHOW_VAR com_status_vars[]= { {"xa_recover", (char*) offsetof(System_status_var, com_stat[(uint) SQLCOM_XA_RECOVER]), SHOW_LONG_STATUS, SHOW_SCOPE_ALL}, {"xa_rollback", (char*) offsetof(System_status_var, com_stat[(uint) SQLCOM_XA_ROLLBACK]), SHOW_LONG_STATUS, SHOW_SCOPE_ALL}, {"xa_start", (char*) offsetof(System_status_var, com_stat[(uint) SQLCOM_XA_START]), SHOW_LONG_STATUS, SHOW_SCOPE_ALL}, + {"trx_to_time", (char*) offsetof(System_status_var, com_stat[(uint) SQLCOM_TRXTOTIME]), SHOW_LONG_STATUS, SHOW_SCOPE_ALL}, /* Flashback */ + {"time_to_trx", (char*) offsetof(System_status_var, com_stat[(uint) SQLCOM_TIMETOTRX]), SHOW_LONG_STATUS, SHOW_SCOPE_ALL}, /* Flashback */ + {"current_trx", (char*) offsetof(System_status_var, com_stat[(uint) SQLCOM_CURRENTTRX]), SHOW_LONG_STATUS, SHOW_SCOPE_ALL}, /* Flashback */ {NullS, NullS, SHOW_LONG, SHOW_SCOPE_ALL} }; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/parse_tree_nodes.cc mysql-8.0.3-rc/sql/parse_tree_nodes.cc --- mysql-8.0.3-rc/sql/parse_tree_nodes.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/parse_tree_nodes.cc 2020-05-03 10:31:24.428769738 +0800 @@ -1680,6 +1680,11 @@ Sql_cmd *PT_create_table_stmt::make_cmd( pc2.create_info->options|= HA_LEX_CREATE_TMP_TABLE; if (only_if_not_exists) pc2.create_info->options|= HA_LEX_CREATE_IF_NOT_EXISTS; + if (opt_with_flashback) + pc2.create_info->options|= HA_LEX_CREATE_FLASHBACK_TABLE; + if (pc2.create_info->options & HA_LEX_CREATE_TMP_TABLE && + pc2.create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE) + my_error(ER_TEMPORARY_TEMPORAL_CONFLICT, MYF(0)); pc2.create_info->default_table_charset= NULL; @@ -1688,8 +1693,15 @@ Sql_cmd *PT_create_table_stmt::make_cmd( TABLE_LIST *qe_tables= nullptr; + /* CREATE TABLE ... AS SELECT not supported so far */ + if (pc2.create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE && + opt_query_expression) + my_error(ER_CREATE_TEMPORAL_NOT_SUPPORTED, MYF(0), "CREATE TABLE ... AS SELECT ..."); + if (opt_like_clause != NULL) { + if (pc2.create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE) + my_error(ER_CREATE_TEMPORAL_NOT_SUPPORTED, MYF(0), "CREATE TABLE ... LIKE ..."); pc2.create_info->options|= HA_LEX_CREATE_TABLE_LIKE; TABLE_LIST **like_clause_table= &lex->query_tables->next_global; TABLE_LIST *src_table= pc.select->add_table_to_list(thd, @@ -1723,6 +1735,8 @@ Sql_cmd *PT_create_table_stmt::make_cmd( if (opt_partitioning) { + if (pc2.create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE) + my_error(ER_CREATE_TEMPORAL_NOT_SUPPORTED, MYF(0), "CREATE TABLE ... PARTITION BY ..."); TABLE_LIST **exclude_part_tables= lex->query_tables_last; if (opt_partitioning->contextualize(&pc)) return NULL; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/parse_tree_nodes.h mysql-8.0.3-rc/sql/parse_tree_nodes.h --- mysql-8.0.3-rc/sql/parse_tree_nodes.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/parse_tree_nodes.h 2020-05-03 10:31:37.688769228 +0800 @@ -545,16 +545,19 @@ class PT_table_factor_table_ident : publ List *opt_use_partition; LEX_STRING *opt_table_alias; List *opt_key_definition; + Temporal_hint *opt_temporal_hint; public: PT_table_factor_table_ident(Table_ident *table_ident_arg, List *opt_use_partition_arg, LEX_STRING *opt_table_alias_arg, - List *opt_key_definition_arg) + List *opt_key_definition_arg, + Temporal_hint *opt_temporal_hint= NULL) : table_ident(table_ident_arg), opt_use_partition(opt_use_partition_arg), opt_table_alias(opt_table_alias_arg), - opt_key_definition(opt_key_definition_arg) + opt_key_definition(opt_key_definition_arg), + opt_temporal_hint(opt_temporal_hint) {} virtual bool contextualize(Parse_context *pc) @@ -574,6 +577,7 @@ public: return true; if (pc->select->add_joined_table(value)) return true; + value->temporal_hint= opt_temporal_hint; return false; } }; @@ -3101,6 +3105,9 @@ typedef PT_traceable_create_table_option HA_CREATE_USED_TABLESPACE> PT_create_tablespace_option; +typedef PT_traceable_create_table_option + PT_create_hist_tablespace_option; typedef PT_traceable_create_table_option @@ -3455,6 +3462,9 @@ class PT_create_table_stmt final : publi HA_CREATE_INFO m_create_info; + /* Flashback */ + bool opt_with_flashback; + public: /** @param mem_root MEM_ROOT to use for allocation @@ -3472,6 +3482,7 @@ public: for @SQL{CREATE TABLE ... SELECT} statements). @param opt_query_expression NULL or the @SQL{@B{SELECT}} clause. + @param opt_with_flashback True if @SQL{CREATE %TABLE ... @B{FLASHBACK}} */ PT_create_table_stmt( MEM_ROOT *mem_root, @@ -3482,7 +3493,8 @@ public: const Trivial_array *opt_create_table_options, PT_partition *opt_partitioning, On_duplicate on_duplicate, - PT_query_expression *opt_query_expression) + PT_query_expression *opt_query_expression, + bool opt_with_flashback= false) : PT_table_ddl_stmt_base(mem_root), is_temporary(is_temporary), only_if_not_exists(only_if_not_exists), @@ -3492,7 +3504,8 @@ public: opt_partitioning(opt_partitioning), on_duplicate(on_duplicate), opt_query_expression(opt_query_expression), - opt_like_clause(NULL) + opt_like_clause(NULL), + opt_with_flashback(opt_with_flashback) {} /** @param mem_root MEM_ROOT to use for allocation @@ -3505,7 +3518,8 @@ public: bool is_temporary, bool only_if_not_exists, Table_ident *table_name, - Table_ident *opt_like_clause) + Table_ident *opt_like_clause, + bool opt_with_flashback= false) : PT_table_ddl_stmt_base(mem_root), is_temporary(is_temporary), only_if_not_exists(only_if_not_exists), @@ -3515,7 +3529,8 @@ public: opt_partitioning(NULL), on_duplicate(On_duplicate::ERROR), opt_query_expression(NULL), - opt_like_clause(opt_like_clause) + opt_like_clause(opt_like_clause), + opt_with_flashback(opt_with_flashback) {} Sql_cmd *make_cmd(THD *thd) override; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/records.cc mysql-8.0.3-rc/sql/records.cc --- mysql-8.0.3-rc/sql/records.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/records.cc 2020-05-03 10:32:32.772767110 +0800 @@ -399,6 +399,8 @@ static int rr_handle_error(READ_RECORD * if (error == HA_ERR_END_OF_FILE) error= -1; + else if (error == HA_END_OF_ORIG_SCAN) // Flashback + error= -2; else { if (info->print_error) diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_base.cc mysql-8.0.3-rc/sql/sql_base.cc --- mysql-8.0.3-rc/sql/sql_base.cc 2019-12-05 20:21:35.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_base.cc 2020-04-19 23:52:39.634543833 +0800 @@ -2964,6 +2964,57 @@ tdc_wait_for_old_version(THD *thd, const } +bool set_and_check_temporal(const THD* thd, TABLE_LIST* table_list, + const dd::Table* table_def, const TABLE* table) +{ + if (table_def) { + /* First time open this table. + Get temporal attribute from dd */ + table_list->hist_tbl = table_def->get_hist_tbl(); + table_list->orig_tbl = table_def->get_orig_tbl(); + } else if (table) { + /* Get table from Table_cache. */ + table_list->hist_tbl = table->hist_tbl; + table_list->orig_tbl = table->orig_tbl; + } else { + /* table_def and table may be both null */ + } + + LEX* lex = thd->lex; + + if (table_list->hist_tbl) { + if (table_list->attached_history) { + //nothing + } else if (lex->sql_command == SQLCOM_DELETE) { + if (!thd->security_context()->check_access(SUPER_ACL) || !thd->is_admin_conn()) { + my_error(ER_HIST_TABLE_DELETE_NOT_PERMITTED, MYF(0), table_list->table_name); + return true; + } + } else if (lex->sql_command != SQLCOM_SHOW_CREATE && + lex->sql_command != SQLCOM_SHOW_FIELDS && + lex->sql_command != SQLCOM_CHECK && + !(lex->sql_command == SQLCOM_CREATE_TABLE && + lex->create_info->options & HA_LEX_CREATE_TABLE_LIKE)) { + my_error(ER_HIST_TABLE_NOT_ACCESSIBLE_DIRECTLY, MYF(0), table_list->table_name); + return true; + } + } + + /* Only temporal select is allowed. */ + if (table_list->temporal_hint && lex->sql_command != SQLCOM_SELECT) { + my_error(ER_ONLY_SELECT_SUPPORTS_TEMPORAL, MYF(0)); + return true; + } + /* Temporal select only works on original tables. */ + if (table_list->temporal_hint && !table_list->orig_tbl) { + my_error(ER_ONLY_ORIG_TABLE_SUPPORTS_TEMPORAL, MYF(0), table_list->table_name); + return true; + } + + return false; +} + + /** Open a base table. @@ -3392,6 +3443,11 @@ retry_share: DBUG_RETURN(TRUE); } } + if (set_and_check_temporal(thd, table_list, NULL, table)) { + tc->release_table(thd, table); + tc->unlock(); + DBUG_RETURN(TRUE); + } tc->unlock(); /* Call rebind_psi outside of the critical section. */ @@ -3575,6 +3631,13 @@ share_found: sizeof(*table), MYF(MY_WME)))) goto err_lock; + /* Flashback */ + if (set_and_check_temporal(thd, table_list, table_def, NULL)) { + goto err_lock; + } + if (table_list->hist_tbl) + share->need_trx_id_index = true; + error= open_table_from_share(thd, share, alias, ((flags & MYSQL_OPEN_NO_NEW_TABLE_IN_SE) ? 0 : @@ -8546,6 +8609,10 @@ find_field_in_tables(THD *thd, Item_iden for (; cur_table != last_table ; cur_table= cur_table->next_name_resolution_table) { + /* Skip history tables added in make_his_table_list */ + if (cur_table->hist_tbl && cur_table->attached_history) + continue; + Field *cur_field= find_field_in_table_ref(thd, cur_table, name, length, item->item_name.ptr(), db, table_name, ref, @@ -9817,7 +9884,9 @@ insert_fields(THD *thd, Name_resolution_ if ((table_name && my_strcasecmp(table_alias_charset, table_name, tables->alias)) || - (db_name && strcmp(tables->db,db_name))) + (db_name && strcmp(tables->db,db_name)) || + /* Skip history table added in make_his_table_list. */ + (tables->hist_tbl && tables->attached_history)) continue; /* diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_base.h mysql-8.0.3-rc/sql/sql_base.h --- mysql-8.0.3-rc/sql/sql_base.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_base.h 2020-04-19 23:52:39.598543834 +0800 @@ -573,4 +573,9 @@ private: bool m_has_protection_against_grl; }; +/* Flashback */ +#define HISTORY_TABLE_POSTFIX "_history" +#define HISTORY_TABLE_POSTFIX_LENGTH 8 +#define DEFAULT_HISTORY_TABLESPACE "ibhistory" + #endif /* SQL_BASE_INCLUDED */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_class.cc mysql-8.0.3-rc/sql/sql_class.cc --- mysql-8.0.3-rc/sql/sql_class.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_class.cc 2020-05-03 10:33:47.576764233 +0800 @@ -2997,3 +2997,165 @@ bool THD::is_current_stmt_binlog_row_ena is_current_stmt_binlog_format_row() && !is_current_stmt_binlog_disabled()); } + +/* Flashback */ +bool check_table_innodb(TABLE *table) +{ + if (table) + return table->file->ht->db_type == DB_TYPE_INNODB; + else + return false; +} + +MYSQL_TEMPORAL_HINT thd_get_table_temporal_hint(MYSQL_THD thd, const char *tbl_name) +{ + Temporal_hint *temporal_hint= NULL; + TABLE *table= NULL; + + if (!(likely(get_server_state() == SERVER_OPERATING))) + return NULL; + + SELECT_LEX *select_lex= thd->lex->current_select(); + if (select_lex == NULL) + return NULL; + + std::string his_tbl_name(tbl_name); + his_tbl_name.append(HISTORY_TABLE_POSTFIX); + + TABLE_LIST *table_list= select_lex->get_table_list(); + + while(table_list) + { + std::string db_tbl= table_list->db; + db_tbl.append("/").append(table_list->table_name); + + /* Original table found. */ + if (!strcmp(db_tbl.c_str(), tbl_name)) + { + if (!check_table_innodb(table_list->table)) + return NULL; + + temporal_hint= table_list->temporal_hint; + } + /* History table found. */ + else if(db_tbl == his_tbl_name) + { + table= table_list->table; + + if (!check_table_innodb(table)) + return NULL; + } + + if (temporal_hint && table) + break; + + table_list= table_list->next_local; + } + + if (temporal_hint) + temporal_hint->table= table; + + return temporal_hint; +} + +/** + Prepare a Table_ident and add a history table_list into SELECT_LEX + + @param thd Thread + @param sel Instance of SELECT_LEX + @param db_name Database name + @param table_name Table name + + @returns true on failure. + false on success. +*/ +TABLE_LIST* make_his_table_list(THD *thd, SELECT_LEX *sel, + const LEX_CSTRING &db_name, + const LEX_CSTRING &table_name) +{ + DBUG_ASSERT(thd->stmt_arena->state != Query_arena::STMT_PREPARED && + thd->stmt_arena->state != Query_arena::STMT_EXECUTED); + + Query_arena arena_backup; + Table_ident *table_ident; + + /* Prepare clause, + or call procedure. */ + if (thd->stmt_arena->is_stmt_prepare() || + thd->stmt_arena->state == Query_arena::STMT_INITIALIZED_FOR_SP) + { + // Set thd.mem_root to thd.stmt_arena.mem_root. + thd->set_n_backup_active_arena(thd->stmt_arena, &arena_backup); + + // Alloc db_name_copy and table_name_copy on thd.stmt_arena.mem_root. + LEX_STRING db_name_copy, table_name_copy; + lex_string_copy(thd->mem_root, &db_name_copy, db_name.str, db_name.length); + lex_string_copy(thd->mem_root, &table_name_copy, table_name.str, table_name.length); + + LEX_CSTRING db_cstr, table_cstr; + db_cstr.str= static_cast(db_name_copy.str); + db_cstr.length= db_name_copy.length; + table_cstr.str= static_cast(table_name_copy.str); + table_cstr.length= table_name_copy.length; + + table_ident= new (thd->mem_root) Table_ident(thd->get_protocol(), + db_cstr, + table_cstr, 1); + } + else + { + table_ident= new (thd->mem_root) Table_ident(thd->get_protocol(), + db_name, table_name, 1); + } + + TABLE_LIST *tbl= sel->add_table_to_list(thd, table_ident, 0, 0, + TL_READ_DEFAULT, MDL_SHARED_READ, + 0, 0, 0, NULL); + /* Prepare clause, + or call procedure. */ + if (thd->stmt_arena->is_stmt_prepare() || + thd->stmt_arena->state == Query_arena::STMT_INITIALIZED_FOR_SP) + { + // Reset thd.mem_root from thd.stmt_arena.mem_root. + thd->restore_active_arena(thd->stmt_arena, &arena_backup); + } + + /* Raise ER_HIST_TABLE_NOT_ACCESSIBLE_DIRECTLY + when access history table in sql statement, + while not to do so in temporal select. */ + if (thd->get_stmt_da()->is_error() && + thd->get_stmt_da()->mysql_errno() == ER_HIST_TABLE_NOT_ACCESSIBLE_DIRECTLY) + { + thd->get_stmt_da()->remove_warning(); + thd->get_stmt_da()->reset_diagnostics_area(); + } + + tbl->attached_history= true; + + return tbl; +} + +bool thd_acquire_flashback_lock(MYSQL_THD thd, MDL_ticket **mdl) +{ + MDL_request mdl_request; + MDL_REQUEST_INIT(&mdl_request, MDL_key::FLASHBACK, "", "", + MDL_INTENTION_EXCLUSIVE, MDL_EXPLICIT); + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) { + return true; + } + + if (mdl) *mdl= mdl_request.ticket; + return false; +} + +void thd_release_flashback_lock(MYSQL_THD thd, MDL_ticket **mdl) +{ + + if (*mdl == nullptr) { + return; + } + + thd->mdl_context.release_lock(*mdl); + *mdl= nullptr; +} diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_class.h mysql-8.0.3-rc/sql/sql_class.h --- mysql-8.0.3-rc/sql/sql_class.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_class.h 2020-04-19 23:52:39.606543834 +0800 @@ -4189,6 +4189,9 @@ public: Returns the current waiting_for_disk_space flag value. */ bool is_waiting_for_disk_space() const { return waiting_for_disk_space; } + + /* Flashback: Check for admin connection */ + bool is_admin_conn() const { return this->net.vio->type == VIO_TYPE_SOCKET; } }; @@ -4583,6 +4586,14 @@ inline void reattach_engine_ha_data_to_t } } +MYSQL_TEMPORAL_HINT thd_get_table_temporal_hint(MYSQL_THD thd, + const char *tbl_name); +TABLE_LIST* make_his_table_list(THD *thd, SELECT_LEX *sel, + const LEX_CSTRING &db_name, + const LEX_CSTRING &table_name); + +bool thd_acquire_flashback_lock(MYSQL_THD thd, MDL_ticket **mdl); +void thd_release_flashback_lock(MYSQL_THD thd, MDL_ticket **mdl); /*************************************************************************/ #endif /* SQL_CLASS_INCLUDED */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_cmd_dml.h mysql-8.0.3-rc/sql/sql_cmd_dml.h --- mysql-8.0.3-rc/sql/sql_cmd_dml.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_cmd_dml.h 2020-04-19 23:52:39.594543835 +0800 @@ -116,6 +116,19 @@ protected: LEX *lex; ///< Pointer to LEX for this statement Query_result *result; ///< Pointer to object for handling of the result bool m_empty_query; ///< True if query will produce no rows + +/* Flashback */ +protected: +/** + Find history table to be opend in open_tables_for_query(). + + @param thd the current thread + + @returns false if success, true if error +*/ + virtual bool prepare_temporal(THD *thd MY_ATTRIBUTE((unused))) { return false; }; }; +bool dml_prepare_temporal(THD* thd); + #endif /* SQL_CMD_DML_INCLUDED */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_delete.cc mysql-8.0.3-rc/sql/sql_delete.cc --- mysql-8.0.3-rc/sql/sql_delete.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_delete.cc 2020-04-19 23:52:39.594543835 +0800 @@ -69,6 +69,7 @@ #include "sql/trigger_def.h" #include "sql/uniques.h" // Unique #include "sql_string.h" +#include "transaction.h" //Flashback: trans_commit, trans_commit_stmt class COND_EQUAL; class Item_exists_subselect; @@ -640,6 +641,12 @@ cleanup: { my_ok(thd, deleted_rows); DBUG_PRINT("info",("%ld records deleted",(long) deleted_rows)); + /* Flashback: Delete on history table is automatically committed always. */ + if (delete_table_ref->hist_tbl) + { + trans_commit_stmt(thd); + trans_commit(thd); + } } DBUG_RETURN(error > 0); } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_delete.h mysql-8.0.3-rc/sql/sql_delete.h --- mysql-8.0.3-rc/sql/sql_delete.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_delete.h 2020-04-19 23:52:39.598543834 +0800 @@ -117,6 +117,8 @@ protected: bool execute_inner(THD *thd) override; + virtual bool prepare_temporal(THD *thd) { return dml_prepare_temporal(thd); } + private: bool delete_from_single_table(THD *thd); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_error.cc mysql-8.0.3-rc/sql/sql_error.cc --- mysql-8.0.3-rc/sql/sql_error.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_error.cc 2020-04-19 23:52:39.594543835 +0800 @@ -727,6 +727,17 @@ void Diagnostics_area::push_diagnostics_ } +void Diagnostics_area::remove_warning() +{ + Sql_condition* condition= error_condition(); + + m_conditions_list.remove(condition); + + m_current_statement_cond_count_by_sl[(uint)condition->severity()]--; + m_current_statement_cond_count--; +} + + Diagnostics_area *Diagnostics_area::pop_diagnostics_area() { DBUG_ASSERT(m_stacked_da); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_error.h mysql-8.0.3-rc/sql/sql_error.h --- mysql-8.0.3-rc/sql/sql_error.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_error.h 2020-04-19 23:52:39.614543834 +0800 @@ -710,6 +710,10 @@ private: ulong m_saved_warn_count; friend class THD; + +/** Flashback: Remove warning from m_conditions_list */ +public: + void remove_warning(); }; /////////////////////////////////////////////////////////////////////////// diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_executor.cc mysql-8.0.3-rc/sql/sql_executor.cc --- mysql-8.0.3-rc/sql/sql_executor.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_executor.cc 2020-04-19 23:52:39.590543835 +0800 @@ -1671,7 +1671,17 @@ sub_select(JOIN *join, QEP_TAB *const qe if (error > 0 || (join->thd->is_error())) // Fatal error rc= NESTED_LOOP_ERROR; else if (error < 0) - break; + { + if (error == -2) // Flashback + { + in_first_read= true; + continue; + } + else + { + break; + } + } else if (join->thd->killed) // Aborted by user { join->thd->send_kill_message(); @@ -2223,6 +2233,12 @@ evaluate_null_complemented_join_record(J int report_handler_error(TABLE *table, int error) { + /* Flashback */ + if (error == HA_END_OF_ORIG_SCAN) + { + return -2; + } + if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND) { table->set_no_row(); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_initialize.cc mysql-8.0.3-rc/sql/sql_initialize.cc --- mysql-8.0.3-rc/sql/sql_initialize.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_initialize.cc 2020-04-19 23:52:39.606543834 +0800 @@ -77,6 +77,13 @@ static const char *session_service_initi NULL }; +/* Flashback */ +static const char *initialization_history_tablespace[] = +{ + "CREATE TABLESPACE `ibhistory` ADD DATAFILE 'ibhistory.ibd' Engine=InnoDB;\n", + NULL +}; + static const char** cmds[]= { initialization_cmds, @@ -86,6 +93,7 @@ static const char** cmds[]= fill_help_tables, session_service_initialization_data, mysql_sys_schema, + initialization_history_tablespace, NULL }; @@ -99,6 +107,7 @@ static const char *cmd_descs[]= "Filling in the mysql.help table", "Creating user for internal session service", "Creating the sys schema", + "Creating default history tablespace", NULL }; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_join_buffer.cc mysql-8.0.3-rc/sql/sql_join_buffer.cc --- mysql-8.0.3-rc/sql/sql_join_buffer.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_join_buffer.cc 2020-05-03 10:36:43.940757451 +0800 @@ -2044,6 +2044,7 @@ enum_nested_loop_state JOIN_CACHE_BNL::j READ_RECORD *info= &qep_tab->read_record; do { + if (error == -2) continue; // Flashback if (qep_tab->keep_current_rowid) qep_tab->table()->file->position(qep_tab->table()->record[0]); @@ -2090,7 +2091,7 @@ enum_nested_loop_state JOIN_CACHE_BNL::j } } } - } while (!(error= info->read_record(info))); + } while (!(error= info->read_record(info)) || error == -2); if (error > 0) // Fatal error rc= NESTED_LOOP_ERROR; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_lex.cc mysql-8.0.3-rc/sql/sql_lex.cc --- mysql-8.0.3-rc/sql/sql_lex.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_lex.cc 2020-04-19 23:52:39.610543834 +0800 @@ -518,6 +518,8 @@ void LEX::reset() binlog_need_explicit_defaults_ts= false; m_extended_show= false; + is_temporal_query= false; //flashback + clear_privileges(); } @@ -1401,6 +1403,12 @@ int MYSQLlex(YYSTYPE *yylval, YYLTYPE *y yylloc->raw.end= lip->get_ptr(); lip->add_digest_token(WITH_ROLLUP_SYM, yylval); return WITH_ROLLUP_SYM; + /* Flashback */ + case TEMPORAL_SYM: + yylloc->cpp.end= lip->get_cpp_ptr(); + yylloc->raw.end= lip->get_ptr(); + lip->add_digest_token(WITH_TEMPORAL_SYM, yylval); + return WITH_TEMPORAL_SYM; default: /* Save the token following 'WITH' diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_lex.h mysql-8.0.3-rc/sql/sql_lex.h --- mysql-8.0.3-rc/sql/sql_lex.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_lex.h 2020-05-03 10:38:41.660752923 +0800 @@ -145,6 +145,40 @@ const size_t INITIAL_LEX_PLUGIN_LIST_SIZ enum class partition_type; // from partition_element.h enum class enum_key_algorithm; // from partition_info.h +/* Flashback */ +class Convert_info { + public: + Convert_info() : trxortime(0), lower(0), upper(0) {} + Convert_info(THD *thd, const LEX_STRING &lexstr, ulonglong lower, ulonglong upper, bool flag) + : lower(lower), upper(upper) { + if (flag) { + trxortime = strtoull(lexstr.str, NULL, 10); + } else { + Item *tmp_item= create_temporal_literal(thd, static_cast(lexstr.str), + lexstr.length, system_charset_info, + MYSQL_TYPE_DATETIME, true); + if (tmp_item) + { + MYSQL_TIME mysql_time; + (void)tmp_item->get_date(&mysql_time, 0); + struct tm tm = { .tm_sec = (int)mysql_time.second, + .tm_min = (int)mysql_time.minute, + .tm_hour = (int)mysql_time.hour, + .tm_mday = (int)mysql_time.day, + //Align definition of struct tm + .tm_mon = (int)mysql_time.month - 1, + .tm_year = (int)mysql_time.year - 1900 }; + trxortime = (ulonglong)mktime(&tm); + } + } + } + + public: + ulonglong trxortime; + ulonglong lower; + ulonglong upper; +}; + /* There are 8 different type of table access so there is no more than combinations 2^8 = 256: @@ -2245,6 +2279,8 @@ union YYSTYPE { bool resource_group_flag_type; resourcegroups::Type resource_group_type; Trivial_array *thread_id_list_type; + /* Flashback */ + class Temporal_hint *temporal_hint; }; static_assert(sizeof(YYSTYPE) <= 32, "YYSTYPE is too big"); @@ -3609,6 +3645,10 @@ public: bool locate_var_assignment(const Name_string &name); + /* Flashback*/ + bool is_temporal_query; + Convert_info *convert_info; + void insert_values_map(Field *f1, Field *f2) { if (!insert_update_values_map) diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_optimizer.cc mysql-8.0.3-rc/sql/sql_optimizer.cc --- mysql-8.0.3-rc/sql/sql_optimizer.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_optimizer.cc 2020-05-03 10:39:19.696751461 +0800 @@ -2760,10 +2760,12 @@ void JOIN::adjust_access_methods() from) 2) Covering indexes are available 3) This isn't a derived table/materialized view + 4) Not temporal querying an original table, or transitional versions are not queried */ if (!tab->table()->no_keyread && // 1 !tab->table()->covering_keys.is_clear_all() && // 2 - !tl->uses_materialization()) // 3 + !tl->uses_materialization() && // 3 + !(tl->orig_tbl && tl->temporal_hint)) // 4 { /* It has turned out that the change commented out below, while speeding diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_parse.cc mysql-8.0.3-rc/sql/sql_parse.cc --- mysql-8.0.3-rc/sql/sql_parse.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_parse.cc 2020-05-03 10:44:00.120788677 +0800 @@ -3023,6 +3023,177 @@ mysql_execute_command(THD *thd, bool fir break; } + /* Flashback */ + case SQLCOM_TRXTOTIME: + { + + List field_list; + Diagnostics_area new_stmt_da(false); + thd->push_diagnostics_area(&new_stmt_da); + new_stmt_da.reset_statement_cond_count(); + field_list.push_back(new Item_return_int("TRX_ID", 8, MYSQL_TYPE_LONGLONG)); + field_list.push_back(new Item_empty_string("START_TIME", 64)); + field_list.push_back(new Item_empty_string("FINISH_TIME", 64)); + field_list.push_back(new Item_empty_string("STATUS", 10)); + if (thd->send_result_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)){ + + } + bool rc= false; + + Protocol *protocol= thd->get_protocol(); + handlerton *db_type= ha_resolve_by_legacy_type(thd, DB_TYPE_INNODB); + handler *file= get_new_handler((TABLE_SHARE*)0, 0, thd->mem_root, db_type); + std::vector v; + file->trxtotime(thd, &v); + + for (auto iter= v.begin(); !rc && iter != v.end(); iter++) + { + protocol->start_row(); + protocol->store(iter->first); + char* start= new char[64]; + char* finish= new char[64]; + struct tm p; + p= *localtime(&iter->second.start_time.tv_sec); + strftime(start, 64, "%Y-%m-%d %H:%M:%S", &p); + if (!start) continue; + p= *localtime(&iter->second.finish_time.tv_sec); + strftime(finish, 64, "%Y-%m-%d %H:%M:%S", &p); + if (!finish) continue; + protocol->store(start, strlen(start), system_charset_info); + protocol->store(finish, strlen(finish), system_charset_info); + switch(iter->second.status) { + case TLOG_TRX_STATUS_NOT_STARTED: + protocol->store("UNDO", 4, + system_charset_info); + break; + case TLOG_TRX_STATUS_IN_PROGRESS: + protocol->store("INPROGRESS", 10, + system_charset_info); + break; + case TLOG_TRX_STATUS_ABORT: + protocol->store("ABORTED", 7, + system_charset_info); + break; + case TLOG_TRX_STATUS_COMMIT: + protocol->store("COMMITED", 8, + system_charset_info); + break; + default: + assert(0); + break; + } + if (protocol->end_row()) + rc= true; + } + + thd->pop_diagnostics_area(); + if(!rc) + my_eof(thd); + + destroy(file); + + break; + } + case SQLCOM_TIMETOTRX: + { + + List field_list; + Diagnostics_area new_stmt_da(false); + thd->push_diagnostics_area(&new_stmt_da); + new_stmt_da.reset_statement_cond_count(); + field_list.push_back(new Item_return_int("TRX_ID", 8, MYSQL_TYPE_LONGLONG)); + field_list.push_back(new Item_empty_string("START_TIME", 64)); + field_list.push_back(new Item_empty_string("FINISH_TIME", 64)); + field_list.push_back(new Item_empty_string("STATUS", 10)); + if (thd->send_result_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)){ + + } + bool rc= false; + Protocol *protocol= thd->get_protocol(); + handlerton *db_type= ha_resolve_by_legacy_type(thd, DB_TYPE_INNODB); + handler *file= get_new_handler((TABLE_SHARE*)0, 0, thd->mem_root, db_type); + std::vector v; + file->timetotrx(thd, &v); + + for(auto iter= v.begin(); !rc && iter != v.end(); iter++) + { + protocol->start_row(); + protocol->store(iter->first); + char* start= new char[64]; + char* finish= new char[64]; + struct tm p; + p= *localtime(&iter->second.start_time.tv_sec); + strftime(start, 64, "%Y-%m-%d %H:%M:%S", &p); + if (!start) continue; + p= *localtime(&iter->second.finish_time.tv_sec); + strftime(finish, 64, "%Y-%m-%d %H:%M:%S", &p); + if (!finish) continue; + protocol->store(start, strlen(start), system_charset_info); + protocol->store(finish, strlen(finish), system_charset_info); + switch(iter->second.status) { + case TLOG_TRX_STATUS_NOT_STARTED: + protocol->store("UNDO", 4, + system_charset_info); + break; + case TLOG_TRX_STATUS_IN_PROGRESS: + protocol->store("INPROGRESS", 10, + system_charset_info); + break; + case TLOG_TRX_STATUS_ABORT: + protocol->store("ABORTED", 7, + system_charset_info); + break; + case TLOG_TRX_STATUS_COMMIT: + protocol->store("COMMITED", 8, + system_charset_info); + break; + default: + assert(0); + break; + } + if (protocol->end_row()) + rc= true; + } + thd->pop_diagnostics_area(); + if(!rc) + my_eof(thd); + + destroy(file); + + break; + } + case SQLCOM_CURRENTTRX: + { + List field_list; + Diagnostics_area new_stmt_da(false); + thd->push_diagnostics_area(&new_stmt_da); + new_stmt_da.reset_statement_cond_count(); + field_list.push_back(new Item_return_int("CURRENT_TRX_ID", 8, MYSQL_TYPE_LONGLONG)); + if (thd->send_result_metadata(&field_list, + Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)){ + + } + bool rc= false; + Protocol *protocol= thd->get_protocol(); + handlerton *db_type= ha_resolve_by_legacy_type(thd, DB_TYPE_INNODB); + handler *file= get_new_handler((TABLE_SHARE*)0, 0, thd->mem_root, db_type); + ulonglong trx_id; + file->currenttrx(thd, &trx_id); + + protocol->start_row(); + protocol->store(trx_id); + if (protocol->end_row()) + rc= true; + thd->pop_diagnostics_area(); + if(!rc) + my_eof(thd); + + destroy(file); + + break; + } case SQLCOM_PREPARE: { mysql_sql_stmt_prepare(thd); @@ -4766,7 +4937,12 @@ finish: /* report error issued during command execution */ if (thd->killed) thd->send_kill_message(); - if (thd->is_error() || (thd->variables.option_bits & OPTION_MASTER_SQL_ERROR)) + + /* Flashback: Delete on history table is always automatically committed */ + if (lex->sql_command == SQLCOM_DELETE && + thd->lex->select_lex->get_table_list()->hist_tbl) + thd->get_stmt_da()->set_overwrite_status(false); + else if (thd->is_error() || (thd->variables.option_bits & OPTION_MASTER_SQL_ERROR)) trans_rollback_stmt(thd); else { @@ -5975,6 +6151,9 @@ bool PT_common_table_expr::match_table_r @param partition_names @param option @param pc Current parsing context, if available. + @param temporal_hint + @param attach_history_table false: operate history table directly, + true: operate history table indirectly. @return Pointer to TABLE_LIST element added to the total table list @retval diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_resolver.cc mysql-8.0.3-rc/sql/sql_resolver.cc --- mysql-8.0.3-rc/sql/sql_resolver.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_resolver.cc 2020-04-19 23:52:39.594543835 +0800 @@ -727,6 +727,10 @@ static TABLE_LIST **make_leaf_tables(TAB { for (TABLE_LIST *table= tables; table; table= table->next_local) { + /* Flashback */ + if (table->hist_tbl && table->attached_history) + continue; + // A mergable view is not allowed to have a table pointer. DBUG_ASSERT(!(table->is_view() && table->is_merged() && table->table)); if (table->merge_underlying_list) diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_select.cc mysql-8.0.3-rc/sql/sql_select.cc --- mysql-8.0.3-rc/sql/sql_select.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_select.cc 2020-05-03 10:46:22.504783201 +0800 @@ -371,6 +371,44 @@ bool Sql_cmd_dml::prepare(THD *thd) (void) unit->cleanup(false); DBUG_RETURN(true); } + + /* Flashback: + Here to append history tables into lex->query_tables for sel/upd/del. */ + if (lex->is_temporal_query && + thd->stmt_arena->state != Query_arena::STMT_PREPARED && + thd->stmt_arena->state != Query_arena::STMT_EXECUTED) + { + if (prepare_temporal(thd) || precheck(thd)) + goto err; + /* Find the first history table to be opened. */ + TABLE_LIST* lex_query_tbl_iter; + bool has_orig_tbl= false; + for (lex_query_tbl_iter= lex->query_tables; + lex_query_tbl_iter != NULL && lex_query_tbl_iter->table; + lex_query_tbl_iter= lex_query_tbl_iter->next_global) + { + if (lex_query_tbl_iter->orig_tbl) + has_orig_tbl= true; + } + if (has_orig_tbl && lex_query_tbl_iter) { + uint orig_table_count= thd->lex->table_count; + TABLE_LIST* lex_query_tables= lex->query_tables; + lex->query_tables= lex_query_tbl_iter; + if (open_tables_for_query(thd, lex_query_tbl_iter, + needs_explicit_preparation() ? + MYSQL_OPEN_FORCE_SHARED_MDL : 0)) + { + lex->query_tables= lex_query_tables; + if (thd->is_error()) + goto err; + (void) unit->cleanup(false); + DBUG_RETURN(true); + } + thd->lex->table_count+= orig_table_count; + lex->query_tables= lex_query_tables; + } + } + #ifndef DBUG_OFF if (sql_command_code() == SQLCOM_SELECT) DEBUG_SYNC(thd, "after_table_open"); @@ -518,6 +556,8 @@ bool Sql_cmd_dml::execute(THD *thd) bool error_handler_active= false; bool res; + MDL_request mdl_request; + Ignore_error_handler ignore_handler; Strict_error_handler strict_handler; @@ -529,6 +569,16 @@ bool Sql_cmd_dml::execute(THD *thd) if (is_timer_applicable_to_statement(thd)) statement_timer_armed= set_statement_timer(thd); + if (lex->is_temporal_query) + { + MDL_REQUEST_INIT(&mdl_request, MDL_key::FLASHBACK, "", "", MDL_SHARED, + MDL_EXPLICIT); + + if (thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + goto err; + } + if (!is_prepared()) { prepare_only= false; // Indicate that call is from execute @@ -632,6 +682,9 @@ bool Sql_cmd_dml::execute(THD *thd) DBUG_EXECUTE_IF("use_attachable_trx", thd->end_attachable_transaction();); + if (lex->is_temporal_query && mdl_request.ticket) + thd->mdl_context.release_lock(mdl_request.ticket); + if (statement_timer_armed && thd->timer) reset_statement_timer(thd); @@ -651,6 +704,10 @@ bool Sql_cmd_dml::execute(THD *thd) err: DBUG_ASSERT(thd->is_error() || thd->killed); DBUG_PRINT("info",("report_error: %d", thd->is_error())); + + if (lex->is_temporal_query && mdl_request.ticket) + thd->mdl_context.release_lock(mdl_request.ticket); + THD_STAGE_INFO(thd, stage_end); prepare_only= true; @@ -5392,3 +5449,51 @@ join_type calc_join_type(int quick_type) /** @} (end of group Query_Optimizer) */ + +/** +Find history table to be opend in open_tables_for_query(). +Update/delete/select require history tables opened. +Insert in an original table doesnt require history table opened. +*/ +bool dml_prepare_temporal(THD *thd) +{ + if (get_server_state() == SERVER_BOOTING) { + return false; + } + + bool res= false; + + LEX *lex= thd->lex; + TABLE_LIST *table_list= lex->query_tables; + + while(table_list) + { + if (table_list->orig_tbl) + { + std::string his_tbl_name(table_list->table_name); + his_tbl_name.append(HISTORY_TABLE_POSTFIX); + + LEX_CSTRING his_tbl_name_lex_cstr; + LEX_CSTRING his_db_name_lex_cstr; + + if(!thd->make_lex_string(&his_tbl_name_lex_cstr, his_tbl_name.data(), + his_tbl_name.length(), FALSE) || + !thd->make_lex_string(&his_db_name_lex_cstr, table_list->db, + table_list->db_length, FALSE)) + { + res= true; + break; + } + + if (nullptr == make_his_table_list(thd, lex->select_lex, + his_db_name_lex_cstr, + his_tbl_name_lex_cstr)) { + res = true; + break; + } + } + table_list= table_list->next_global; + } + + return res; +} diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_select.h mysql-8.0.3-rc/sql/sql_select.h --- mysql-8.0.3-rc/sql/sql_select.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_select.h 2020-04-19 23:52:39.610543834 +0800 @@ -78,6 +78,8 @@ protected: virtual bool precheck(THD *thd); virtual bool prepare_inner(THD *thd); + + virtual bool prepare_temporal(THD *thd) { return dml_prepare_temporal(thd); } }; /** diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_show.cc mysql-8.0.3-rc/sql/sql_show.cc --- mysql-8.0.3-rc/sql/sql_show.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_show.cc 2020-04-19 23:52:39.598543834 +0800 @@ -1623,7 +1623,9 @@ int store_create_info(THD *thd, TABLE_LI show_table_options= TRUE; /* TABLESPACE and STORAGE */ - if (share->tablespace || + /* Flashback: No need to show default history tablespace */ + if ((share->tablespace && + (0 != strcmp(share->tablespace, DEFAULT_HISTORY_TABLESPACE))) || share->default_storage_media != HA_SM_DEFAULT) { packet->append(STRING_WITH_LEN(" /*!50100")); @@ -1789,6 +1791,14 @@ int store_create_info(THD *thd, TABLE_LI packet->append(STRING_WITH_LEN(" CONNECTION=")); append_unescaped(packet, share->connect_string.str, share->connect_string.length); } + /* Flashback */ + if ((table_list->orig_tbl == true) && (table_list->hist_tbl == false)) + { + packet->append(STRING_WITH_LEN(" FLASHBACK")); + packet->append(STRING_WITH_LEN(" /*FLASHBACK ORIGINAL*/")); + } + if ((table_list->orig_tbl == false) && (table_list->hist_tbl == true)) + packet->append(STRING_WITH_LEN(" /*FLASHBACK HISTORICAL*/")); append_directory(thd, packet, "DATA", create_info.data_file_name); append_directory(thd, packet, "INDEX", create_info.index_file_name); } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_table.cc mysql-8.0.3-rc/sql/sql_table.cc --- mysql-8.0.3-rc/sql/sql_table.cc 2019-12-05 20:21:17.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_table.cc 2020-05-03 15:59:06.234660698 +0800 @@ -7257,29 +7257,17 @@ bool collect_fk_parents_for_new_fks(THD return false; } - -/** - Implementation of SQLCOM_CREATE_TABLE. - - Take the metadata locks (including a shared lock on the affected - schema) and create the table. Is written to be called from - mysql_execute_command(), to which it delegates the common parts - with other commands (i.e. implicit commit before and after, - close of thread tables. -*/ - -bool mysql_create_table(THD *thd, TABLE_LIST *create_table, - HA_CREATE_INFO *create_info, - Alter_info *alter_info) +bool mysql_create_table_inner(THD *thd, TABLE_LIST *create_table, + HA_CREATE_INFO *create_info, + Alter_info *alter_info, + handlerton *post_ddl_ht, + Foreign_key_parents_invalidator &fk_invalidator, + bool &go_to_end) { bool result; bool is_trans= FALSE; uint not_used; - handlerton *post_ddl_ht= nullptr; - Foreign_key_parents_invalidator fk_invalidator; - DBUG_ENTER("mysql_create_table"); - - dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client()); + DBUG_ENTER("mysql_create_table_inner"); /* Open or obtain "X" MDL lock on the table being created. @@ -7289,6 +7277,7 @@ bool mysql_create_table(THD *thd, TABLE_ if (open_tables(thd, &thd->lex->query_tables, ¬_used, 0)) { result= true; + go_to_end= true; goto end; } @@ -7333,6 +7322,7 @@ bool mysql_create_table(THD *thd, TABLE_ thd->variables.lock_wait_timeout))) { result= true; + go_to_end= true; goto end; } } @@ -7372,9 +7362,10 @@ bool mysql_create_table(THD *thd, TABLE_ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) thd->get_transaction()->mark_created_temp_table(Transaction_ctx::STMT); - if (!thd->is_current_stmt_binlog_format_row() || + if ((!thd->is_current_stmt_binlog_format_row() || (thd->is_current_stmt_binlog_format_row() && - !(create_info->options & HA_LEX_CREATE_TMP_TABLE))) + !(create_info->options & HA_LEX_CREATE_TMP_TABLE))) && + !(thd->lex->create_info->options & HA_LEX_CREATING_FLASHBACK_TABLE)) { thd->add_to_binlog_accessed_dbs(create_table->db); result= write_bin_log(thd, true, @@ -7429,12 +7420,144 @@ bool mysql_create_table(THD *thd, TABLE_ result= update_referencing_views_metadata(thd, create_table, !is_trans, &uncommitted_tables); } + } - /* - Unless we are executing CREATE TEMPORARY TABLE we need to commit - changes to the data-dictionary, SE and binary log and possibly run - handlerton's post-DDL hook. - */ +end: + DBUG_RETURN(result); +} + +bool mysql_prepare_create_history_table( + THD *thd, + HA_CREATE_INFO *create_info) +{ + DBUG_ENTER("mysql_prepare_create_history_table"); + + LEX *lex= thd->lex; + + DBUG_ASSERT(lex->create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE); + + std::string tmp_str(lex->query_tables->table_name, lex->query_tables->table_name_length); + tmp_str.append(HISTORY_TABLE_POSTFIX); + + LEX_CSTRING his_db_name; + LEX_CSTRING his_tbl_name; + if (!thd->make_lex_string(&his_db_name, lex->query_tables->db, lex->query_tables->db_length, FALSE) || + !thd->make_lex_string(&his_tbl_name, tmp_str.data(), tmp_str.length(), FALSE)) { + DBUG_RETURN(true); + } + + TABLE_LIST* table= make_his_table_list(thd, lex->select_lex, his_db_name, his_tbl_name); + if (!table) { + DBUG_RETURN(true); + } + table->open_strategy= TABLE_LIST::OPEN_FOR_CREATE; + + lex->query_tables= table; + lex->create_info->options|= HA_LEX_CREATING_FLASHBACK_TABLE; + create_info->options|= HA_LEX_CREATING_FLASHBACK_TABLE; + + DBUG_RETURN(false); +} + +/** + Implementation of SQLCOM_CREATE_TABLE. + + Take the metadata locks (including a shared lock on the affected + schema) and create the table. Is written to be called from + mysql_execute_command(), to which it delegates the common parts + with other commands (i.e. implicit commit before and after, + close of thread tables. +*/ + +bool mysql_create_table(THD *thd, TABLE_LIST *create_table, + HA_CREATE_INFO *create_info, + Alter_info *alter_info) +{ + bool result; + bool go_to_end= false; + handlerton *post_ddl_ht= nullptr; + Foreign_key_parents_invalidator fk_invalidator; + DBUG_ENTER("mysql_create_table"); + + dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client()); + + /* Flashback */ + if (!(create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE) && + create_info->hist_tablespace) + { + my_error(ER_SYNTAX_ERROR, MYF(0)); + DBUG_RETURN(true); + } + + result= mysql_create_table_inner(thd, create_table, + create_info, alter_info, + post_ddl_ht, fk_invalidator, + go_to_end); + + if (go_to_end) + goto mysql_create_table_end; + + if (likely(get_server_state() == SERVER_OPERATING) && + thd->lex->create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE && + !result) + { + if (mysql_prepare_create_history_table(thd, create_info)) { + DBUG_RETURN(true); + } + create_table= thd->lex->query_tables; + /** + If we didn't get a hist_tablespace name from create_info, we assign + the default history tablespace to history table, else we assign + create_info->hist_tablespace to create_info->tablespace and create + tablespace in original logic. We can just treat hist_tablespace here + as temp name of real tablespace to create. + */ + if (create_info->hist_tablespace == NULL) + { + create_info->tablespace= DEFAULT_HISTORY_TABLESPACE; + if (!thd->make_lex_string(&create_table->target_tablespace_name, + create_info->tablespace, + strlen(create_info->tablespace), false)) + DBUG_RETURN(true); + } + else if (create_info->hist_tablespace != NULL) + { + /** + Flashback + + Normally, we validate tablespace in Sql_cmd_create_table::execute, + however, we have passed this step when difine history tablespace. + So, do it again here. + */ + if (validate_tablespace_name_length(create_info->hist_tablespace) || + validate_tablespace_name(false, create_info->hist_tablespace, + create_info->db_type)) + DBUG_RETURN(true); + + create_info->tablespace= create_info->hist_tablespace; + + if (!thd->make_lex_string(&create_table->target_tablespace_name, + create_info->tablespace, + strlen(create_info->tablespace), false)) + DBUG_RETURN(true); + } + + result= mysql_create_table_inner(thd, create_table, + create_info, alter_info, + post_ddl_ht, fk_invalidator, + go_to_end); + + if (go_to_end) + goto mysql_create_table_end; + } + + /* + Unless we are executing CREATE TEMPORARY TABLE we need to commit + changes to the data-dictionary, SE and binary log and possibly run + handlerton's post-DDL hook. + */ + if (!(create_info->options & HA_LEX_CREATE_TMP_TABLE)) + { if (!result && !thd->is_plugin_fake_ddl()) result= trans_commit_stmt(thd) || trans_commit_implicit(thd); @@ -7468,7 +7591,7 @@ bool mysql_create_table(THD *thd, TABLE_ } } -end: +mysql_create_table_end: DBUG_RETURN(result); } @@ -7944,6 +8067,7 @@ bool mysql_create_like_table(THD* thd, T */ if (open_tables(thd, &thd->lex->query_tables, ¬_used, 0)) DBUG_RETURN(true); + src_table->table->use_all_columns(); const dd::Table *src_table_obj= nullptr; @@ -8007,6 +8131,13 @@ bool mysql_create_like_table(THD* thd, T memset(&local_create_info, 0, sizeof(local_create_info)); local_create_info.db_type= src_table->table->s->db_type(); local_create_info.row_type= src_table->table->s->row_type; + + /* Flashback */ + if (src_table->orig_tbl) + local_create_info.options= create_info->options | HA_LEX_CREATE_FLASHBACK_TABLE; + if (src_table->hist_tbl) + local_create_info.options= create_info->options | HA_LEX_CREATING_FLASHBACK_TABLE; + if (mysql_prepare_alter_table(thd, src_table_obj, src_table->table, &local_create_info, &local_alter_info, &local_alter_ctx)) @@ -14725,3 +14856,35 @@ static bool check_engine(THD *thd, const DBUG_RETURN(false); } + +/** + @brief Get flashback attributes of the table from dd + Flashback + */ +TABLE_LIST *get_temporal_attribute(THD *thd, TABLE_LIST *table) +{ + DBUG_ENTER("get_temporal_attribute"); + + MDL_request mdl_request; + MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, table->db, + table->table_name, MDL_SHARED, MDL_TRANSACTION); + dd::Schema_MDL_locker mdl_locker(thd); + + dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client()); + + if (mdl_locker.ensure_locked(table->db) || + thd->mdl_context.acquire_lock(&mdl_request, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(nullptr); + + const dd::Abstract_table *at= nullptr; + bool acquire_failed= thd->dd_client()->acquire(table->db, table->table_name, &at); + + if (acquire_failed || at == nullptr) + DBUG_RETURN(nullptr); + + table->hist_tbl= at->get_hist_tbl(); + table->orig_tbl= at->get_orig_tbl(); + + DBUG_RETURN(table); +} diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_table.h mysql-8.0.3-rc/sql/sql_table.h --- mysql-8.0.3-rc/sql/sql_table.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_table.h 2020-04-19 23:52:39.582543835 +0800 @@ -505,4 +505,7 @@ extern MYSQL_PLUGIN_IMPORT const char *p bool lock_trigger_names(THD *thd, TABLE_LIST *tables); +/* Flashback */ +TABLE_LIST *get_temporal_attribute(THD *thd, TABLE_LIST *table); + #endif /* SQL_TABLE_INCLUDED */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_tablespace.cc mysql-8.0.3-rc/sql/sql_tablespace.cc --- mysql-8.0.3-rc/sql/sql_tablespace.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_tablespace.cc 2020-05-03 10:49:48.060775296 +0800 @@ -651,6 +651,18 @@ bool Sql_cmd_drop_tablespace::execute(TH } rollback_on_return.m_hton= hton; + /* + Flashback + + We can't drop the 'ibhistory' tablespace, cause + it's the default tablespace for history table. + */ + if (strcmp(m_tablespace_name.str, DEFAULT_HISTORY_TABLESPACE) == 0) + { + my_error(ER_ALTER_DEFAULT_HISTORY_TABLESPACE, MYF(0), DEFAULT_HISTORY_TABLESPACE); + return true; + } + if (validate_tablespace_name(true, m_tablespace_name.str, hton)) { return true; @@ -940,6 +952,17 @@ bool Sql_cmd_alter_tablespace_rename::ex { Rollback_guard rollback_on_return{thd}; + /* + Flashback + We can't rename the 'ibhistory' tablespace, cause + it's the default tablespace for history table. + */ + if (strcmp(m_tablespace_name.str, DEFAULT_HISTORY_TABLESPACE) == 0) + { + my_error(ER_ALTER_DEFAULT_HISTORY_TABLESPACE, MYF(0), DEFAULT_HISTORY_TABLESPACE); + return true; + } + if (check_global_access(thd, CREATE_TABLESPACE_ACL)) { return true; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_update.h mysql-8.0.3-rc/sql/sql_update.h --- mysql-8.0.3-rc/sql/sql_update.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_update.h 2020-04-19 23:52:39.582543835 +0800 @@ -150,6 +150,8 @@ protected: bool execute_inner(THD *thd) override; + virtual bool prepare_temporal(THD *thd) { return dml_prepare_temporal(thd); } + private: bool update_single_table(THD *thd); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/sql_yacc.yy mysql-8.0.3-rc/sql/sql_yacc.yy --- mysql-8.0.3-rc/sql/sql_yacc.yy 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/sql_yacc.yy 2020-05-03 10:50:51.768772846 +0800 @@ -1189,6 +1189,12 @@ bool my_yyoverflow(short **a, YYSTYPE ** %token RESOURCE_SYM /* MYSQL */ %token SYSTEM_SYM /* SQL-2003-R */ %token VCPU_SYM /* MYSQL */ +%token WITH_TEMPORAL_SYM /* Flashback */ +%token TEMPORAL_SYM /* Flashback */ +%token HIST_TABLESPACE_SYM /* Flashback: history tablespace */ +%token TRXTOTIME_SYM /* Flashback */ +%token TIMETOTRX_SYM /* Flashback */ +%token CURRENTTRX_SYM /* Flashback */ /* @@ -1805,6 +1811,11 @@ bool my_yyoverflow(short **a, YYSTYPE ** ts_option_undo_buffer_size ts_option_wait +/* Flashback */ +%type opt_with_temporal opt_only_history +%type opt_temporal_hint +%type opt_temporal_hint_to_clause + %% /* @@ -2005,6 +2016,9 @@ simple_statement: | update_stmt { MAKE_CMD($1); } | use | xa + | trxtotime /* Flashback */ + | timetotrx /* Flashback */ + | currenttrx /* Flashback */ ; deallocate: @@ -2549,36 +2563,52 @@ opt_channel: } ; +/* Flashback */ +opt_with_temporal: + /* empty */ { $$= false; } + | WITH_TEMPORAL_SYM { $$= true; } + ; + create_table_stmt: CREATE opt_temporary TABLE_SYM opt_if_not_exists table_ident '(' table_element_list ')' opt_create_table_options_etc + /* Flashback */ + opt_with_temporal { $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $2, $4, $5, $7, $9.opt_create_table_options, $9.opt_partitioning, $9.on_duplicate, - $9.opt_query_expression); + $9.opt_query_expression, + $10); } | CREATE opt_temporary TABLE_SYM opt_if_not_exists table_ident opt_create_table_options_etc + /* Flashback */ + opt_with_temporal { $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $2, $4, $5, NULL, $6.opt_create_table_options, $6.opt_partitioning, $6.on_duplicate, - $6.opt_query_expression); + $6.opt_query_expression, + $7); } | CREATE opt_temporary TABLE_SYM opt_if_not_exists table_ident LIKE table_ident + /* Flashback */ + opt_with_temporal { - $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $2, $4, $5, $7); + $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $2, $4, $5, $7, $8); } | CREATE opt_temporary TABLE_SYM opt_if_not_exists table_ident '(' LIKE table_ident ')' + /* Flashback */ + opt_with_temporal { - $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $2, $4, $5, $8); + $$= NEW_PTN PT_create_table_stmt(YYMEM_ROOT, $2, $4, $5, $8, $10); } ; @@ -5795,6 +5825,10 @@ create_table_option: { $$= NEW_PTN PT_create_tablespace_option($3.str); } + | HIST_TABLESPACE_SYM opt_equal ident + { + $$= NEW_PTN PT_create_hist_tablespace_option($3.str); + } | STORAGE_SYM DISK_SYM { $$= NEW_PTN PT_create_storage_option(HA_SM_DISK); @@ -8784,6 +8818,29 @@ optional_braces: /* empty */ {} | '(' ')' {} ; +/* Flashback */ +trxtotime: + TRXTOTIME_SYM '(' TEXT_STRING_literal ',' ulonglong_num ',' ulonglong_num ')' + { + LEX* lex= Lex; + lex->sql_command= SQLCOM_TRXTOTIME; + lex->convert_info= new Convert_info(YYTHD, $3, $5, $7, true); + } + +timetotrx: + TIMETOTRX_SYM '(' TEXT_STRING_literal ',' ulonglong_num ',' ulonglong_num ')' + { + LEX* lex= Lex; + lex->sql_command= SQLCOM_TIMETOTRX; + lex->convert_info= new Convert_info(YYTHD, $3, $5, $7, false); + } + +currenttrx: + CURRENTTRX_SYM '('')' + { + LEX* lex= Lex; + lex->sql_command= SQLCOM_CURRENTTRX; + } /* all possible expressions */ expr: @@ -10457,11 +10514,63 @@ single_table_parens: single_table: table_ident opt_use_partition opt_table_alias opt_key_definition + /* Flashback temporal hint for single table */ + opt_temporal_hint { - $$= NEW_PTN PT_table_factor_table_ident($1, $2, $3, $4); + $$= NEW_PTN PT_table_factor_table_ident($1, $2, $3, $4, $5); } ; +opt_temporal_hint: + /* empty */ + { + $$= NULL; + } + | SYSTEM_SYM TIME_SYM AS OF_SYM TEXT_STRING_literal opt_only_history + { + LEX* lex= Lex; + lex->is_temporal_query= true; + $$= NEW_PTN Temporal_hint($6); + $$->type= Temporal_hint_type::AS_OF_HINT; + $$->value.as_of_value= Temporal_hint::lexstr_to_unix_time(YYTHD, $5); + } + | SYSTEM_SYM TIME_SYM FROM TEXT_STRING_literal + opt_temporal_hint_to_clause opt_only_history + { + LEX* lex= Lex; + lex->is_temporal_query= true; + $$= NEW_PTN Temporal_hint($6); + $$->type= Temporal_hint_type::FROM_TO_HINT; + $$->value.from_to_value[0]= Temporal_hint::lexstr_to_unix_time(YYTHD, $4); + $$->value.from_to_value[1]= Temporal_hint::lexstr_to_unix_time(YYTHD, $5); + } + | SYSTEM_SYM TRANSACTION_SYM ulonglong_num opt_only_history + { + LEX* lex= Lex; + lex->is_temporal_query= true; + $$= NEW_PTN Temporal_hint($4); + $$->type= Temporal_hint_type::TRX_ID_HINT; + $$->value.trx_id_value= $3; + } + ; + +opt_temporal_hint_to_clause: + TO_SYM CURRENT_SYM + { + $$.str= NULL; + $$.length= 0; + } + | TO_SYM TEXT_STRING_literal + { + $$= $2; + } + ; + +opt_only_history: + /* empty */ { $$= false; } + | ONLY_SYM HISTORY_SYM { $$= true; } + ; + joined_table_parens: '(' joined_table_parens ')' { $$= $2; } | '(' joined_table ')' { $$= $2; } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/table.cc mysql-8.0.3-rc/sql/table.cc --- mysql-8.0.3-rc/sql/table.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/table.cc 2020-05-03 10:55:37.884761843 +0800 @@ -4355,6 +4355,9 @@ void TABLE::init(THD *thd, TABLE_LIST *t bool error MY_ATTRIBUTE((unused))= refix_gc_items(thd); DBUG_ASSERT(!error); + + hist_tbl= tl->hist_tbl; + orig_tbl= tl->orig_tbl; } @@ -8373,3 +8376,58 @@ void TABLE::blobs_need_not_keep_old_valu } } ////////////////////////////////////////////////////////////////////////// + +time_t time_to_epoch(const struct tm *ltm, int utcdiff) { + const int mon_days []= + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + long tyears, tdays, leaps, utc_hrs; + int i; + + tyears= ltm->tm_year - 70; // tm->tm_year is from 1900. + leaps= (tyears + 2) / 4; + + if (!((tyears + 2) % 4) && ltm->tm_mon < 2) + leaps-= 1; + + tdays= 0; + for (i= 0; i < ltm->tm_mon; i++) tdays+= mon_days[i]; + tdays+= ltm->tm_mday - 1; // days of month passed. + tdays= tdays + (tyears * 365) + leaps; + + utc_hrs= ltm->tm_hour - utcdiff; // for your time zone. + return (tdays * 86400) + (utc_hrs * 3600) + (ltm->tm_min * 60) + ltm->tm_sec; +} + +/* Flashback */ +time_t Temporal_hint::lexstr_to_unix_time(THD *thd, const LEX_STRING &lexstr) +{ + time_t ts= 0; + + // For datetime type check + Item *tmp_item= create_temporal_literal(thd, static_cast(lexstr.str), + lexstr.length, system_charset_info, + MYSQL_TYPE_DATETIME, true); + if (tmp_item) + { + MYSQL_TIME mysql_time; + (void)tmp_item->get_date(&mysql_time, 0); + struct tm tm= { .tm_sec= (int)mysql_time.second, + .tm_min= (int)mysql_time.minute, + .tm_hour= (int)mysql_time.hour, + .tm_mday= (int)mysql_time.day, + // Align definition of struct tm + .tm_mon= (int)mysql_time.month - 1, + .tm_year= (int)mysql_time.year - 1900 }; + + time_t time_utc= 0; + struct tm *p_tm_time; + int time_zone= 0; + + p_tm_time= localtime(&time_utc); + time_zone= (p_tm_time->tm_hour > 12) ? (p_tm_time->tm_hour-= 24) : p_tm_time->tm_hour; + + ts= time_to_epoch(&tm, time_zone); + } + + return ts; +} diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/sql/table.h mysql-8.0.3-rc/sql/table.h --- mysql-8.0.3-rc/sql/table.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/sql/table.h 2020-04-19 23:52:39.614543834 +0800 @@ -106,6 +106,8 @@ typedef int8 plan_idx; class Opt_hints_qb; class Opt_hints_table; class SELECT_LEX; +/* Flashback */ +class Temporal_hint; namespace dd { class Table; @@ -934,6 +936,9 @@ struct TABLE_SHARE TABLE_SHARE_FOREIGN_KEY_INFO *foreign_key; uint foreign_key_parents; TABLE_SHARE_FOREIGN_KEY_PARENT_INFO *foreign_key_parent; + + /* Flashback */ + bool need_trx_id_index; /** Set share's table cache key and update its db and table name appropriately. @@ -2045,6 +2050,10 @@ public: to false for all such fields in this table. */ void blobs_need_not_keep_old_value(); + +public: + bool hist_tbl; + bool orig_tbl; }; @@ -2362,6 +2371,12 @@ struct TABLE_LIST MDL_key::TABLE, db, table_name, mdl_type_for_dml(m_lock_descriptor.type), MDL_TRANSACTION); + + /* Flashback */ + hist_tbl= false; + orig_tbl= false; + temporal_hint= NULL; + attached_history= false; } @@ -3310,6 +3325,13 @@ private: enum enum_table_ref_type m_table_ref_type; /** See comments for TABLE_SHARE::get_table_ref_version() */ ulonglong m_table_ref_version; + +public: + bool hist_tbl; + bool orig_tbl; + Temporal_hint *temporal_hint; + /** TABLE_LIST added in make_his_table_list */ + bool attached_history; }; @@ -3988,4 +4010,35 @@ bool create_table_share_for_upgrade(THD bool is_fix_view_cols_and_deps); ////////////////////////////////////////////////////////////////////////// +/* Flashback: Record */ +enum Temporal_hint_type {AS_OF_HINT, FROM_TO_HINT, TRX_ID_HINT, HINT_TYPE_END}; + +typedef union Temporal_hint_value +{ + time_t as_of_value; //AS OF as_of_value + time_t from_to_value[2]; //FROM from_to_value[0] TO from_to_value[1] + ulonglong trx_id_value; //TRANSACTION trx_id_value +} Temporal_hint_value; + +class Temporal_hint : public Sql_alloc +{ +public: + Temporal_hint(bool only_history= false) + : type(HINT_TYPE_END), + only_history(only_history), + table(nullptr) + { memset(&value, 0, sizeof(value)); } + + ~Temporal_hint() {} + + static time_t lexstr_to_unix_time(THD *thd, const LEX_STRING &str); + + enum Temporal_hint_type type{HINT_TYPE_END}; + Temporal_hint_value value; + + bool only_history{false}; + + TABLE *table{nullptr}; //For table.file.row_prebuilt +}; + #endif /* TABLE_INCLUDED */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/btr/btr0cur.cc mysql-8.0.3-rc/storage/innobase/btr/btr0cur.cc --- mysql-8.0.3-rc/storage/innobase/btr/btr0cur.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/btr/btr0cur.cc 2020-05-03 10:57:34.576757356 +0800 @@ -2905,6 +2905,14 @@ btr_cur_ins_lock_and_undo( return(err); } + /* Flashback + Do not log undo nor update roll_ptr + for restoring previous version. */ + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) { + + return(DB_SUCCESS); + } + err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP, thr, index, entry, NULL, 0, NULL, NULL, @@ -4776,12 +4784,15 @@ btr_cur_del_mark_set_clust_rec( return(err); } - err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, - index, entry, NULL, 0, rec, offsets, - &roll_ptr); - if (err != DB_SUCCESS) { + /* Flashback: Delete history table doesn't log undo */ + if (!DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) { + err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, + index, entry, NULL, 0, rec, offsets, + &roll_ptr); + if (err != DB_SUCCESS) { - return(err); + return(err); + } } /* The search latch is not needed here, because @@ -4816,8 +4827,12 @@ btr_cur_del_mark_set_clust_rec( row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr); - btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id, - roll_ptr, mtr); + /* Flashback: Delete history table doesn't log redo */ + if (!DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) + { + btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id, + roll_ptr, mtr); + } return(err); } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/CMakeLists.txt mysql-8.0.3-rc/storage/innobase/CMakeLists.txt --- mysql-8.0.3-rc/storage/innobase/CMakeLists.txt 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/CMakeLists.txt 2020-04-19 23:52:38.838543864 +0800 @@ -169,7 +169,9 @@ SET(INNOBASE_SOURCES ut/ut0rnd.cc ut/ut0ut.cc ut/ut0vec.cc - ut/ut0wqueue.cc) + ut/ut0wqueue.cc + tlog/tlog0tlog.cc + tlog/tlog0lru.cc) IF(WITH_INNODB) # Legacy option diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/dict/dict0boot.cc mysql-8.0.3-rc/storage/innobase/dict/dict0boot.cc --- mysql-8.0.3-rc/storage/innobase/dict/dict0boot.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/dict/dict0boot.cc 2020-04-19 23:52:38.834543864 +0800 @@ -258,19 +258,19 @@ dict_boot(void) /* Be sure these constants do not ever change. To avoid bloat, only check the *NUM_FIELDS* in each table */ ut_ad(DICT_NUM_COLS__SYS_TABLES == 8); - ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 10); + ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 11); ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2); ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7); - ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9); + ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 10); ut_ad(DICT_NUM_COLS__SYS_INDEXES == 8); - ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 10); + ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 11); ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3); - ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5); + ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 6); ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4); - ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 6); + ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 7); ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2); ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4); - ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6); + ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 7); heap = mem_heap_create(450); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/dict/dict0dd.cc mysql-8.0.3-rc/storage/innobase/dict/dict0dd.cc --- mysql-8.0.3-rc/storage/innobase/dict/dict0dd.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/dict/dict0dd.cc 2020-05-03 16:05:26.024573093 +0800 @@ -2004,6 +2004,10 @@ dd_copy_from_table_share( } } + /* Flashback:pass GEN_TRX_ID_INDEX */ + if (index != nullptr) + index = index->next(); + #ifdef UNIV_DEBUG if (index != nullptr) { ut_ad(table_share->keys == 0); @@ -2073,6 +2077,22 @@ dd_fill_dict_index( } } + /* Flashback:create index for DB_TRX_ID */ + if (m_form->s->need_trx_id_index) { + dict_index_t* index = dict_mem_index_create( + m_table->name.m_name, "GEN_TRX_ID_INDEX", + 0, 0, 0); + + index->index_trx_id = true; + + dberr_t new_err = dict_index_add_to_cache( + m_table, index, index->page, FALSE); + if (new_err != DB_SUCCESS) { + error = HA_ERR_GENERIC; + goto dd_error; + } + } + if (dict_table_has_fts_index(m_table)) { ut_ad(DICT_TF2_FLAG_IS_SET(m_table, DICT_TF2_FTS)); } @@ -2514,6 +2534,11 @@ dd_fill_dict_table( mem_heap_free(heap); + if (dd_tab->get_hist_tbl()) + DICT_TF2_FLAG_SET(m_table, DICT_TF2_HIST_TABLE); + if (dd_tab->get_orig_tbl()) + DICT_TF2_FLAG_SET(m_table, DICT_TF2_ORIG_TABLE); + return(m_table); } @@ -3684,6 +3709,12 @@ dd_open_table_one( mem_heap_t* heap = mem_heap_create(1000); bool fail = false; + /* Flashback */ + uint64 id_now = 0; + uint32 root_now = 0; + uint32 sid_now = 0; + uint64 trx_id_now = 0; + /* Now fill the space ID and Root page number for each index */ dict_index_t* index = m_table->first_index(); for (const auto dd_index : dd_table->indexes()) { @@ -3760,6 +3791,20 @@ dd_open_table_one( index->rtr_srs.reset(fetch_srs(index->srid)); index = index->next(); + + /* Flashback */ + root_now = root; + sid_now = sid; + id_now = id; + trx_id_now = trx_id; + } + + /* Flashback: fill the space ID and Root page number for GEN_TRX_ID_INDEX */ + if (index != nullptr) { + index->page = root_now + 1; + index->space = sid_now; + index->id = id_now + 1; + index->trx_id = trx_id_now; } if (!implicit) { diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/dict/dict0dict.cc mysql-8.0.3-rc/storage/innobase/dict/dict0dict.cc --- mysql-8.0.3-rc/storage/innobase/dict/dict0dict.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/dict/dict0dict.cc 2020-05-03 16:07:10.956569057 +0800 @@ -1248,10 +1248,17 @@ dict_table_add_system_columns( #error "DATA_ROLL_PTR != 2" #endif + dict_mem_table_add_col(table, heap, "DB_END_TRX_ID", DATA_SYS, + DATA_END_TRX_ID | DATA_NOT_NULL, + DATA_END_TRX_ID_LEN); +#if DATA_END_TRX_ID != 3 +#error "DATA_END_TRX_ID != 3" +#endif + /* This check reminds that if a new system column is added to the program, it should be dealt with here */ -#if DATA_N_SYS_COLS != 3 -#error "DATA_N_SYS_COLS != 3" +#if DATA_N_SYS_COLS != 4 +#error "DATA_N_SYS_COLS != 4" #endif } } @@ -2205,12 +2212,12 @@ dict_col_name_is_reserved( { /* This check reminds that if a new system column is added to the program, it should be dealt with here. */ -#if DATA_N_SYS_COLS != 3 -#error "DATA_N_SYS_COLS != 3" +#if DATA_N_SYS_COLS != 4 +#error "DATA_N_SYS_COLS != 4" #endif static const char* reserved_names[] = { - "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR" + "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR", "DB_END_TRX_ID" }; ulint i; @@ -3157,6 +3164,9 @@ dict_index_build_internal_clust( #if DATA_ROLL_PTR != 2 # error "DATA_ROLL_PTR != 2" #endif +#if DATA_END_TRX_ID != 3 +# error "DATA_END_TRX_ID != 3" +#endif if (!dict_index_is_unique(index)) { dict_index_add_col(new_index, table, @@ -3167,6 +3177,8 @@ dict_index_build_internal_clust( dict_index_add_col( new_index, table, table->get_sys_col(DATA_TRX_ID), 0, true); + if (table->flags2 & DICT_TF2_HIST_TABLE) + new_index->n_uniq++; for (i = 0; i < trx_id_pos; i++) { @@ -3211,6 +3223,13 @@ dict_index_build_internal_clust( dict_index_add_col( new_index, table, table->get_sys_col(DATA_ROLL_PTR), 0, true); + + dict_index_add_col( + new_index, table, + table->get_sys_col(DATA_END_TRX_ID), 0, true); + + if (table->flags2 & DICT_TF2_HIST_TABLE) + new_index->n_uniq += 2; } /* Remember the table columns already contained in new_index */ @@ -3296,6 +3315,14 @@ dict_index_build_internal_non_clust( new_index->id = index->id; + /* Flashback */ + if (index->index_trx_id) { + new_index->n_user_defined_cols = 1; + dict_index_add_col( + new_index, table, + table->get_sys_col(DATA_TRX_ID), 0, true); + } + /* Copy fields from index to new_index */ dict_index_copy(new_index, index, table, 0, index->n_fields); @@ -6921,6 +6948,10 @@ DDTableBuffer::create_tuples() col = m_index->table->get_sys_col(DATA_ROLL_PTR); dfield = dtuple_get_nth_field(m_replace_tuple, dict_col_get_no(col)); dfield_set_data(dfield, sys_buf, DATA_ROLL_PTR_LEN); + + col = m_index->table->get_sys_col(DATA_END_TRX_ID); + dfield = dtuple_get_nth_field(m_replace_tuple, dict_col_get_no(col)); + dfield_set_data(dfield, sys_buf, DATA_END_TRX_ID_LEN); } /** Initialize the in-memory index */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/dict/dict0load.cc mysql-8.0.3-rc/storage/innobase/dict/dict0load.cc --- mysql-8.0.3-rc/storage/innobase/dict/dict0load.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/dict/dict0load.cc 2020-04-19 23:52:38.834543864 +0800 @@ -423,6 +423,11 @@ err_len: if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { goto err_len; } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_INDEXES__DB_END_TRX_ID, &len); + if (len != DATA_END_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } field = rec_get_nth_field_old( rec, DICT_FLD__SYS_INDEXES__NAME, &name_len); @@ -582,6 +587,11 @@ err_len: rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len); if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_COLUMNS__DB_END_TRX_ID, &len); + if (len != DATA_END_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; } field = rec_get_nth_field_old( @@ -761,6 +771,12 @@ err_len: goto err_len; } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_VIRTUAL__DB_END_TRX_ID, &len); + if (len != DATA_END_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + if (column != NULL) { *column = table->get_col(base); } @@ -984,6 +1000,11 @@ err_len: if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { goto err_len; } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FIELDS__DB_END_TRX_ID, &len); + if (len != DATA_END_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len); @@ -1057,6 +1078,12 @@ err_len: goto err_len; } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLESPACES__DB_END_TRX_ID, &len); + if (len != DATA_END_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_TABLESPACES__NAME, &len); if (len == 0 || len == UNIV_SQL_NULL) { @@ -1291,6 +1318,11 @@ err_len: if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { goto err_len; } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLES__DB_END_TRX_ID, &len); + if (len != DATA_END_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len); if (len != 8) { diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.cc mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.cc --- mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.cc 2020-05-03 16:17:44.480544695 +0800 @@ -3013,7 +3013,8 @@ ha_innobase::ha_innobase( ), m_start_of_scan(), m_stored_select_lock_type(LOCK_NONE_UNSET), - m_mysql_has_locked() + m_mysql_has_locked(), + m_end_of_orig_scan() {} /*********************************************************************//** @@ -9460,6 +9461,10 @@ ha_innobase::index_read( if (m_prebuilt->sql_stat_start) { build_template(false); + /* Flashback */ + m_prebuilt->t_hint = + thd_get_table_temporal_hint(m_prebuilt->trx->mysql_thd, + m_prebuilt->table->name.m_name); } if (key_ptr != NULL) { @@ -9502,7 +9507,10 @@ ha_innobase::index_read( if (mode != PAGE_CUR_UNSUPP) { - innobase_srv_conc_enter_innodb(m_prebuilt); + /* Flashback + Do not call this for selecting history table */ + if (!m_end_of_orig_scan) + innobase_srv_conc_enter_innodb(m_prebuilt); if (!m_prebuilt->table->is_intrinsic()) { @@ -9517,8 +9525,46 @@ ha_innobase::index_read( m_prebuilt->ins_sel_stmt = thd_is_ins_sel_stmt( m_user_thd); - ret = row_search_mvcc( - buf, mode, m_prebuilt, match_mode, 0); + if ((m_prebuilt->table->flags2 & DICT_TF2_ORIG_TABLE) && m_prebuilt->t_hint) { + + if (m_prebuilt->t_hint->only_history) { + m_end_of_orig_scan = true; + } + + if (m_end_of_orig_scan) { + dtuple_t* orig_clust_ref = nullptr; + dict_index_t* orig_index = nullptr; + dict_table_t* his_table = nullptr; + MDL_ticket* mdl = nullptr; + + open_his_dict_table(his_table, mdl); + + if (prepare_prebuilt_before_his_scan(his_table, + orig_clust_ref, orig_index)) { + + ret = row_search_mvcc( + buf, mode, m_prebuilt, match_mode, 0); + + reset_prebuilt_after_his_scan(orig_clust_ref, orig_index); + + m_start_of_hist_scan = true; + } else { + ret = DB_RECORD_NOT_FOUND; + } + + close_his_dict_table(his_table, mdl); + + } else { + + ret = row_search_mvcc( + buf, mode, m_prebuilt, match_mode, 0); + } + + } else { + + ret = row_search_mvcc( + buf, mode, m_prebuilt, match_mode, 0); + } } else { m_prebuilt->session = thd_to_innodb_session(m_user_thd); @@ -9545,11 +9591,25 @@ ha_innobase::index_read( break; case DB_RECORD_NOT_FOUND: - error = HA_ERR_KEY_NOT_FOUND; - break; - case DB_END_OF_INDEX: - error = HA_ERR_KEY_NOT_FOUND; + if ((m_prebuilt->table->flags2 & DICT_TF2_ORIG_TABLE) && m_prebuilt->t_hint) { + if (!m_end_of_orig_scan) { + if (m_prebuilt->index->last_sel_cur) { + m_prebuilt->index->last_sel_cur->release(); + } + m_end_of_orig_scan = true; + m_start_of_scan = true; + error = HA_END_OF_ORIG_SCAN; + } else { + m_prebuilt->t_hint = NULL; + m_end_of_orig_scan = false; + m_start_of_hist_scan = false; + error = HA_ERR_KEY_NOT_FOUND; + } + + } else { + error = HA_ERR_KEY_NOT_FOUND; + } break; case DB_TABLESPACE_DELETED: @@ -9810,11 +9870,47 @@ ha_innobase::general_fetch( dberr_t ret; if (!intrinsic) { + /* Flashback */ + if ((m_prebuilt->table->flags2 & DICT_TF2_ORIG_TABLE) && m_prebuilt->t_hint) { - ret = row_search_mvcc( - buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, - direction); + if (m_prebuilt->t_hint->only_history) { + m_end_of_orig_scan = true; + } + + if (m_end_of_orig_scan) { + if (!m_start_of_hist_scan) { + int error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY); + DBUG_RETURN(error); + } else { + dtuple_t* orig_clust_ref = nullptr; + dict_index_t* orig_index = nullptr; + dict_table_t* his_table = nullptr; + MDL_ticket* mdl = nullptr; + + open_his_dict_table(his_table, mdl); + + if (prepare_prebuilt_before_his_scan(his_table, + orig_clust_ref, orig_index)) { + + ret = row_search_mvcc( + buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, + direction); + + reset_prebuilt_after_his_scan(orig_clust_ref, orig_index); + } + close_his_dict_table(his_table, mdl); + } + } else { + ret = row_search_mvcc( + buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, + direction); + } + } else { + ret = row_search_mvcc( + buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, + direction); + } } else { ret = row_search_no_mvcc( buf, PAGE_CUR_UNSUPP, m_prebuilt, match_mode, @@ -9831,10 +9927,27 @@ ha_innobase::general_fetch( srv_stats.n_rows_read.add(thd_get_thread_id(trx->mysql_thd), 1); break; case DB_RECORD_NOT_FOUND: - error = HA_ERR_END_OF_FILE; - break; case DB_END_OF_INDEX: - error = HA_ERR_END_OF_FILE; + if ((m_prebuilt->table->flags2 & DICT_TF2_ORIG_TABLE) && m_prebuilt->t_hint) { + if (!m_end_of_orig_scan) { + if (m_prebuilt->index->last_sel_cur) { + m_prebuilt->index->last_sel_cur->release(); + } + m_end_of_orig_scan = true; + m_start_of_scan = true; + error = HA_END_OF_ORIG_SCAN; + } else { + m_prebuilt->t_hint = NULL; + m_end_of_orig_scan = false; + m_start_of_hist_scan = false; + error = HA_ERR_END_OF_FILE; + } + + } else { + m_end_of_orig_scan = false; + m_start_of_hist_scan = false; + error = HA_ERR_END_OF_FILE; + } break; case DB_TABLESPACE_DELETED: ib_senderrf( @@ -11113,6 +11226,9 @@ create_index( /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */ ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0); + /* Flashback: Assert that "GEN_TRX_ID_INDEX" cannot be used as user defined index */ + ut_a(innobase_strcasecmp(key->name, innobase_index_trx_id) != 0); + ind_type = 0; if (key->flags & HA_SPATIAL) { ind_type = DICT_SPATIAL; @@ -11354,6 +11470,50 @@ create_clustered_index_when_no_primary( return(convert_error_code_to_mysql(error, flags, NULL)); } +/*****************************************************************//** +Flashback +Create an index to an InnoDB history table on system column DB_TRX_ID*/ +inline +int +create_trx_id_index( +/*===================================*/ + trx_t* trx, /*!< in: InnoDB transaction handle */ + ulint flags, /*!< in: InnoDB table flags */ + const char* table_name) /*!< in: table name */ +{ + dict_index_t* index; + dberr_t error; + + /* We pass 0 as the space id, and determine at a lower level the space + id where to store the table */ + index = dict_mem_index_create(table_name, + innobase_index_trx_id, + 0, 0, 0); + index->index_trx_id = true; + innodb_session_t*& priv = thd_to_innodb_session(trx->mysql_thd); + + dict_table_t* handler = priv->lookup_table_handler(table_name); + + if (handler != NULL) { + /* Disable use of AHI for intrinsic table indexes as AHI + validates the predicated entry using index-id which has to be + system-wide unique that is not the case with indexes of + intrinsic table for performance reason. + Also given the lifetime of these tables and frequent delete + and update AHI would not help on performance front as it does + with normal tables. */ + index->disable_ahi = true; + } + + error = row_create_index_for_mysql(index, trx, NULL, handler); + + if (error != DB_SUCCESS && handler != NULL) { + priv->unregister_table_handler(table_name); + } + + return(convert_error_code_to_mysql(error, flags, NULL)); +} + /** Validate DATA DIRECTORY option. @return true if valid, false if not. */ bool @@ -12389,6 +12549,11 @@ index_bad: m_flags2 |= DICT_TF2_USE_FILE_PER_TABLE; } + if (m_create_info->options & HA_LEX_CREATING_FLASHBACK_TABLE) + m_flags2 |= DICT_TF2_HIST_TABLE; + else if (m_create_info->options & HA_LEX_CREATE_FLASHBACK_TABLE) + m_flags2 |= DICT_TF2_ORIG_TABLE; + /* Set the table flags */ dict_tf_set(&m_flags, innodb_row_format, zip_ssize, m_use_data_dir, m_use_shared_space); @@ -12912,6 +13077,16 @@ create_table_info_t::create_table( } } + /* Flashback */ + if (m_create_info->options & HA_LEX_CREATING_FLASHBACK_TABLE) { + error = create_trx_id_index( + m_trx, m_flags, m_table_name); + if (error) { + DBUG_RETURN(error); + } + } + + initialize_autoinc(); /* Cache all the FTS indexes on this table in the FTS specific @@ -13824,8 +13999,16 @@ ha_innobase::get_extra_columns_and_keys( DBUG_RETURN(ER_WRONG_COLUMN_NAME); } + dd::Column* db_end_trx_id = dd_add_hidden_column( + dd_table, "DB_END_TRX_ID", DATA_END_TRX_ID_LEN, + dd::enum_column_types::INT24); + if (db_end_trx_id == nullptr) { + DBUG_RETURN(ER_WRONG_COLUMN_NAME); + } + dd_add_hidden_element(primary, db_trx_id); dd_add_hidden_element(primary, db_roll_ptr); + dd_add_hidden_element(primary, db_end_trx_id); /* Add all non-virtual columns to the clustered index, unless they already part of the PRIMARY KEY. */ @@ -15728,6 +15911,12 @@ ha_innobase::info_low( if (!index->is_committed()) { num_innodb_index--; } + + /* Flashback, decrement if + GEN_TRX_ID_INDEX exists. */ + if (!strcmp(index->name(), innobase_index_trx_id)) { + num_innodb_index--; + } } if (table->s->keys < num_innodb_index @@ -20539,6 +20728,24 @@ innobase_index_name_is_reserved( return(true); } + /* Flashback */ + if (innobase_strcasecmp(key->name, + innobase_index_trx_id) == 0) { + /* Push warning to mysql */ + push_warning_printf(thd, + Sql_condition::SL_WARNING, + ER_WRONG_NAME_FOR_INDEX, + "Cannot Create Index with name" + " '%s'. The name is reserved" + " for the system default column" + " index.", + innobase_index_trx_id); + + my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), + innobase_index_trx_id); + + return(true); + } } return(false); @@ -22612,6 +22819,119 @@ bool ha_innobase::is_record_buffer_wante return true; } +/* Flashback */ +int +ha_innobase::trxtotime( + THD* thd, + std::vector* ans) +{ + trx_id_t left_trx = thd->lex->convert_info->trxortime - thd->lex->convert_info->lower; + trx_id_t right_trx = thd->lex->convert_info->trxortime + thd->lex->convert_info->upper; + *ans = read_tlog_by_trx_ids(left_trx, right_trx); + return 0; +} + +int +ha_innobase::timetotrx( + THD* thd, + std::vector* ans) +{ + struct timeval tv1; + struct timeval tv2; + tv1.tv_sec = thd->lex->convert_info->trxortime - thd->lex->convert_info->lower; + tv1.tv_usec = 0; + tv2.tv_sec = thd->lex->convert_info->trxortime + thd->lex->convert_info->upper; + tv2.tv_usec = 0; + *ans = read_tlog_finish_between(tv1, tv2, trx_sys_get_max_trx_id()); + return 0; +} + +int +ha_innobase::currenttrx( + THD* thd, + ulonglong* current_trx_id) +{ + *current_trx_id = trx_sys_get_max_trx_id(); + return 0; +} + +/* Flashback */ +/** Prepare prebuilt before scanning history table. */ +bool ha_innobase::prepare_prebuilt_before_his_scan( + dict_table_t* &his_table, + dtuple_t* &orig_clust_ref MY_ATTRIBUTE((unused)), + dict_index_t* &orig_index MY_ATTRIBUTE((unused))) +{ + DBUG_ENTER("prepare_prebuilt_before_his_scan"); + + bool res = false; + + TABLE* t_hint_table = m_prebuilt->t_hint->table; + + if (his_table != NULL && t_hint_table != NULL) { + /* t_hint_table must be of InnoDB engine, or t_hint is null. */ + row_prebuilt_t* his_prebuilt = + static_cast(t_hint_table->file)->get_prebuilt(); + + /* Assign index of the history prebuilt, + as the same as the index of original prebuilt. */ + for (dict_index_t* his_index = + his_prebuilt->table->first_index(); + his_index; + his_index = his_index->next()) { + if (!strcmp(his_index->name, m_prebuilt->index->name)) { + his_prebuilt->index = his_index; + break; + } + } + + /* Keep clust_ref and index of original prebuilt. */ + orig_clust_ref = m_prebuilt->clust_ref; + orig_index = m_prebuilt->index; + + /* Assign clust_ref and index of history prebuilt + to original prebuilt. */ + m_prebuilt->clust_ref = his_prebuilt->clust_ref; + m_prebuilt->index = his_prebuilt->index; + + res = true; + } + + DBUG_RETURN(res); +} + +/** Reset prebuilt after history table is scanned */ +void ha_innobase::reset_prebuilt_after_his_scan( + dtuple_t* &orig_clust_ref, + dict_index_t* &orig_index) +{ + m_prebuilt->clust_ref = orig_clust_ref; + m_prebuilt->index = orig_index; +} + +void ha_innobase::open_his_dict_table( + dict_table_t* &his_dict_table MY_ATTRIBUTE((unused)), + MDL_ticket* &mdl) +{ + std::string str = std::string(m_prebuilt->table->name.m_name) + + std::string(HISTORY_TABLE_POSTFIX); + char* his_table_name = const_cast(str.data()); + + his_dict_table = dd_table_open_on_name(m_prebuilt->trx->mysql_thd, &mdl, + his_table_name, false, DICT_ERR_IGNORE_NONE); +} + +void ha_innobase::close_his_dict_table( + dict_table_t* &his_dict_table, + MDL_ticket* &mdl) +{ + if (his_dict_table) { + dd_table_close(his_dict_table, + m_prebuilt->trx->mysql_thd, &mdl, false); + } + +} + /******************************************************************//** Use this when the args are passed to the format string from errmsg-utf8.txt directly as is. diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.h mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.h --- mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/handler/ha_innodb.h 2020-04-19 23:52:38.830543864 +0800 @@ -29,9 +29,14 @@ this program; if not, write to the Free #include "my_inttypes.h" #include "trx0trx.h" +/* Flashback */ +#include "tlog0tlog.h" + /** "GEN_CLUST_INDEX" is the name reserved for InnoDB default system clustered index when there is no primary key. */ extern const char innobase_index_reserve_name[]; +/* Flashback */ +extern const char innobase_index_trx_id[]; /* Structure defines translation table between mysql index and InnoDB index structures */ @@ -693,6 +698,50 @@ protected: /** If mysql has locked with external_lock() */ bool m_mysql_has_locked; + + /** Flashback: this is set to 1 when the orig table was already scanned */ + bool m_end_of_orig_scan; + + /** Flashback: this is set to 1 when the history table is scanned */ + bool m_start_of_hist_scan; + +/* Flashback */ +public: + int + trxtotime( + THD* thd, + std::vector* ans); + int + timetotrx( + THD* thd, + std::vector* ans); + int + currenttrx( + THD* thd, + ulonglong* current_trx_id); + + bool + prepare_prebuilt_before_his_scan( + dict_table_t* &his_dict_table, + dtuple_t* &orig_clust_ref, + dict_index_t* &orig_index); + + void + reset_prebuilt_after_his_scan( + dtuple_t* &orig_clust_ref, + dict_index_t* &orig_index); + + void + open_his_dict_table( + dict_table_t* &his_dict_table, + MDL_ticket* &mdl); + + void + close_his_dict_table( + dict_table_t* &his_dict_table, + MDL_ticket* &mdl); + + row_prebuilt_t* get_prebuilt() const { return m_prebuilt; } }; struct trx_t; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/handler/ha_innopart.cc mysql-8.0.3-rc/storage/innobase/handler/ha_innopart.cc --- mysql-8.0.3-rc/storage/innobase/handler/ha_innopart.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/handler/ha_innopart.cc 2020-04-19 23:52:38.830543864 +0800 @@ -3940,6 +3940,12 @@ ha_innopart::info_low( if (!index->is_committed()) { num_innodb_index--; } + + /* Flashback: decrement if + GEN_TRX_ID_INDEX exists. */ + if (!strcmp(index->name(), innobase_index_trx_id)) { + num_innodb_index--; + } } if (table->s->keys < num_innodb_index diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/data0type.h mysql-8.0.3-rc/storage/innobase/include/data0type.h --- mysql-8.0.3-rc/storage/innobase/include/data0type.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/data0type.h 2020-05-03 16:19:07.760541492 +0800 @@ -164,7 +164,15 @@ be less than 256 */ #define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ #define DATA_ROLL_PTR_LEN 7 -#define DATA_N_SYS_COLS 3 /* number of system columns defined above */ +/* Flashback: Column DB_END_TRX_ID for recording which transaction deletes the row */ +#define DATA_END_TRX_ID 3 +#define DATA_END_TRX_ID_LEN 6 +#define END_TRX_ID_MAX_VALUE ((uint64)0xFFFFFFFFFFFF) + +#define DATA_N_EXTRA_SYS_COLS 1 /* number of extra system columns, including DB_END_TRX_ID */ + +#define DATA_N_SYS_COLS (3 + DATA_N_EXTRA_SYS_COLS) + /* number of system columns defined above */ #define DATA_ITT_N_SYS_COLS 2 /* number of system columns for intrinsic diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/data0type.ic mysql-8.0.3-rc/storage/innobase/include/data0type.ic --- mysql-8.0.3-rc/storage/innobase/include/data0type.ic 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/data0type.ic 2020-04-19 23:52:38.846543863 +0800 @@ -500,6 +500,9 @@ dtype_get_fixed_size_low( case DATA_ROLL_PTR: ut_ad(len == DATA_ROLL_PTR_LEN); break; + case DATA_END_TRX_ID: + ut_ad(len == DATA_END_TRX_ID_LEN); + break; default: ut_ad(0); return(0); @@ -580,6 +583,9 @@ dtype_get_min_size_low( case DATA_ROLL_PTR: ut_ad(len == DATA_ROLL_PTR_LEN); break; + case DATA_END_TRX_ID: + ut_ad(len == DATA_END_TRX_ID_LEN); + break; default: ut_ad(0); return(0); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/dict0boot.h mysql-8.0.3-rc/storage/innobase/include/dict0boot.h --- mysql-8.0.3-rc/storage/innobase/include/dict0boot.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/dict0boot.h 2020-04-19 23:52:38.850543863 +0800 @@ -173,14 +173,15 @@ enum dict_fld_sys_tables_enum { DICT_FLD__SYS_TABLES__NAME = 0, DICT_FLD__SYS_TABLES__DB_TRX_ID = 1, DICT_FLD__SYS_TABLES__DB_ROLL_PTR = 2, - DICT_FLD__SYS_TABLES__ID = 3, - DICT_FLD__SYS_TABLES__N_COLS = 4, - DICT_FLD__SYS_TABLES__TYPE = 5, - DICT_FLD__SYS_TABLES__MIX_ID = 6, - DICT_FLD__SYS_TABLES__MIX_LEN = 7, - DICT_FLD__SYS_TABLES__CLUSTER_ID = 8, - DICT_FLD__SYS_TABLES__SPACE = 9, - DICT_NUM_FIELDS__SYS_TABLES = 10 + DICT_FLD__SYS_TABLES__DB_END_TRX_ID = 3, + DICT_FLD__SYS_TABLES__ID = 4, + DICT_FLD__SYS_TABLES__N_COLS = 5, + DICT_FLD__SYS_TABLES__TYPE = 6, + DICT_FLD__SYS_TABLES__MIX_ID = 7, + DICT_FLD__SYS_TABLES__MIX_LEN = 8, + DICT_FLD__SYS_TABLES__CLUSTER_ID = 9, + DICT_FLD__SYS_TABLES__SPACE = 10, + DICT_NUM_FIELDS__SYS_TABLES = 11 }; /* The field numbers in the SYS_TABLE_IDS index */ enum dict_fld_sys_table_ids_enum { @@ -205,12 +206,13 @@ enum dict_fld_sys_columns_enum { DICT_FLD__SYS_COLUMNS__POS = 1, DICT_FLD__SYS_COLUMNS__DB_TRX_ID = 2, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR = 3, - DICT_FLD__SYS_COLUMNS__NAME = 4, - DICT_FLD__SYS_COLUMNS__MTYPE = 5, - DICT_FLD__SYS_COLUMNS__PRTYPE = 6, - DICT_FLD__SYS_COLUMNS__LEN = 7, - DICT_FLD__SYS_COLUMNS__PREC = 8, - DICT_NUM_FIELDS__SYS_COLUMNS = 9 + DICT_FLD__SYS_COLUMNS__DB_END_TRX_ID = 4, + DICT_FLD__SYS_COLUMNS__NAME = 5, + DICT_FLD__SYS_COLUMNS__MTYPE = 6, + DICT_FLD__SYS_COLUMNS__PRTYPE = 7, + DICT_FLD__SYS_COLUMNS__LEN = 8, + DICT_FLD__SYS_COLUMNS__PREC = 9, + DICT_NUM_FIELDS__SYS_COLUMNS = 10 }; /* The columns in SYS_INDEXES */ enum dict_col_sys_indexes_enum { @@ -230,13 +232,14 @@ enum dict_fld_sys_indexes_enum { DICT_FLD__SYS_INDEXES__ID = 1, DICT_FLD__SYS_INDEXES__DB_TRX_ID = 2, DICT_FLD__SYS_INDEXES__DB_ROLL_PTR = 3, - DICT_FLD__SYS_INDEXES__NAME = 4, - DICT_FLD__SYS_INDEXES__N_FIELDS = 5, - DICT_FLD__SYS_INDEXES__TYPE = 6, - DICT_FLD__SYS_INDEXES__SPACE = 7, - DICT_FLD__SYS_INDEXES__PAGE_NO = 8, - DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD = 9, - DICT_NUM_FIELDS__SYS_INDEXES = 10 + DICT_FLD__SYS_INDEXES__DB_END_TRX_ID = 4, + DICT_FLD__SYS_INDEXES__NAME = 5, + DICT_FLD__SYS_INDEXES__N_FIELDS = 6, + DICT_FLD__SYS_INDEXES__TYPE = 7, + DICT_FLD__SYS_INDEXES__SPACE = 8, + DICT_FLD__SYS_INDEXES__PAGE_NO = 9, + DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD = 10, + DICT_NUM_FIELDS__SYS_INDEXES = 11 }; /* The columns in SYS_FIELDS */ enum dict_col_sys_fields_enum { @@ -251,8 +254,9 @@ enum dict_fld_sys_fields_enum { DICT_FLD__SYS_FIELDS__POS = 1, DICT_FLD__SYS_FIELDS__DB_TRX_ID = 2, DICT_FLD__SYS_FIELDS__DB_ROLL_PTR = 3, - DICT_FLD__SYS_FIELDS__COL_NAME = 4, - DICT_NUM_FIELDS__SYS_FIELDS = 5 + DICT_FLD__SYS_FIELDS__DB_END_TRX_ID = 4, + DICT_FLD__SYS_FIELDS__COL_NAME = 5, + DICT_NUM_FIELDS__SYS_FIELDS = 6 }; /* The columns in SYS_FOREIGN */ enum dict_col_sys_foreign_enum { @@ -267,10 +271,11 @@ enum dict_fld_sys_foreign_enum { DICT_FLD__SYS_FOREIGN__ID = 0, DICT_FLD__SYS_FOREIGN__DB_TRX_ID = 1, DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR = 2, - DICT_FLD__SYS_FOREIGN__FOR_NAME = 3, - DICT_FLD__SYS_FOREIGN__REF_NAME = 4, - DICT_FLD__SYS_FOREIGN__N_COLS = 5, - DICT_NUM_FIELDS__SYS_FOREIGN = 6 + DICT_FLD__SYS_FOREIGN__DB_END_TRX_ID = 3, + DICT_FLD__SYS_FOREIGN__FOR_NAME = 4, + DICT_FLD__SYS_FOREIGN__REF_NAME = 5, + DICT_FLD__SYS_FOREIGN__N_COLS = 6, + DICT_NUM_FIELDS__SYS_FOREIGN = 7 }; /* The field numbers in the SYS_FOREIGN_FOR_NAME secondary index */ enum dict_fld_sys_foreign_for_name_enum { @@ -292,9 +297,10 @@ enum dict_fld_sys_foreign_cols_enum { DICT_FLD__SYS_FOREIGN_COLS__POS = 1, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID = 2, DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR = 3, - DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME = 4, - DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 5, - DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 6 + DICT_FLD__SYS_FOREIGN_COLS__DB_END_TRX_ID = 4, + DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME = 5, + DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 6, + DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 7 }; /* The columns in SYS_TABLESPACES */ enum dict_col_sys_tablespaces_enum { @@ -308,9 +314,10 @@ enum dict_fld_sys_tablespaces_enum { DICT_FLD__SYS_TABLESPACES__SPACE = 0, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID = 1, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR = 2, - DICT_FLD__SYS_TABLESPACES__NAME = 3, - DICT_FLD__SYS_TABLESPACES__FLAGS = 4, - DICT_NUM_FIELDS__SYS_TABLESPACES = 5 + DICT_FLD__SYS_TABLESPACES__DB_END_TRX_ID = 3, + DICT_FLD__SYS_TABLESPACES__NAME = 4, + DICT_FLD__SYS_TABLESPACES__FLAGS = 5, + DICT_NUM_FIELDS__SYS_TABLESPACES = 6 }; /* The columns in SYS_DATAFILES */ enum dict_col_sys_datafiles_enum { @@ -323,8 +330,9 @@ enum dict_fld_sys_datafiles_enum { DICT_FLD__SYS_DATAFILES__SPACE = 0, DICT_FLD__SYS_DATAFILES__DB_TRX_ID = 1, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR = 2, - DICT_FLD__SYS_DATAFILES__PATH = 3, - DICT_NUM_FIELDS__SYS_DATAFILES = 4 + DICT_FLD__SYS_DATAFILES__DB_END_TRX_ID = 3, + DICT_FLD__SYS_DATAFILES__PATH = 4, + DICT_NUM_FIELDS__SYS_DATAFILES = 5 }; /* The columns in SYS_VIRTUAL */ @@ -341,7 +349,8 @@ enum dict_fld_sys_virtual_enum { DICT_FLD__SYS_VIRTUAL__BASE_POS = 2, DICT_FLD__SYS_VIRTUAL__DB_TRX_ID = 3, DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR = 4, - DICT_NUM_FIELDS__SYS_VIRTUAL = 5 + DICT_FLD__SYS_VIRTUAL__DB_END_TRX_ID = 5, + DICT_NUM_FIELDS__SYS_VIRTUAL = 6 }; /* A number of the columns above occur in multiple tables. These are the diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/dict0dict.h mysql-8.0.3-rc/storage/innobase/include/dict0dict.h --- mysql-8.0.3-rc/storage/innobase/include/dict0dict.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/dict0dict.h 2020-04-19 23:52:38.846543863 +0800 @@ -1803,12 +1803,14 @@ private: /** Clustered index field number of mysql.innodb_dynamic_metadata.version */ - static constexpr unsigned VERSION_FIELD_NO = VERSION_COL_NO + 2; + /* Plusing 1 here skips the DATA_END_TRX_ID field */ + static constexpr unsigned VERSION_FIELD_NO = VERSION_COL_NO + 2 + DATA_N_EXTRA_SYS_COLS; /** Clustered index field number of mysql.innodb_dynamic_metadata.metadata Plusing 2 here skips the DATA_TRX_ID and DATA_ROLL_PTR fields */ - static constexpr unsigned METADATA_FIELD_NO = METADATA_COL_NO + 2; + /* Plusing 1 here skips the DATA_END_TRX_ID field */ + static constexpr unsigned METADATA_FIELD_NO = METADATA_COL_NO + 2 + DATA_N_EXTRA_SYS_COLS; /** Number of fields in the clustered index */ static constexpr unsigned N_FIELDS = METADATA_FIELD_NO + 1; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/dict0mem.h mysql-8.0.3-rc/storage/innobase/include/dict0mem.h --- mysql-8.0.3-rc/storage/innobase/include/dict0mem.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/dict0mem.h 2020-05-03 14:32:18.883246161 +0800 @@ -235,7 +235,7 @@ ROW_FORMAT=REDUNDANT. InnoDB engines do for unknown bits in order to protect backward incompatibility. */ /* @{ */ /** Total number of bits in table->flags2. */ -#define DICT_TF2_BITS 11 +#define DICT_TF2_BITS 13 #define DICT_TF2_UNUSED_BIT_MASK (~0U << DICT_TF2_BITS) #define DICT_TF2_BIT_MASK ~DICT_TF2_UNUSED_BIT_MASK @@ -273,6 +273,14 @@ it is not created by user and so not vis /** Table is opened by resurrected trx during crash recovery. */ #define DICT_TF2_RESURRECT_PREPARED 1024 + +/** Flashback +Table is a user-defined table having a corresponding history table */ +#define DICT_TF2_ORIG_TABLE 2048 + +/** Flashback +Table is a history table */ +#define DICT_TF2_HIST_TABLE 4096 /* @} */ #define DICT_TF2_FLAG_SET(table, flag) \ @@ -923,6 +931,10 @@ public: system clustered index when there is no primary key. */ const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX"; +/** Flashback "GEN_TRX_ID_INDEX" is the reserved for InnoDB +default system column DB_TRX_ID, affects history table only. */ +const char innobase_index_trx_id[] = "GEN_TRX_ID_INDEX"; + namespace dd { class Spatial_reference_system; } @@ -1082,6 +1094,9 @@ struct dict_index_t{ upper levels of the index tree */ bool fill_dd;/*!< Flag whether need to fill dd tables when it's a fulltext index. */ + bool index_trx_id; + /*!< Flag whether need to build an index + for DB_TRX_ID,Flashback. */ /** Determine if the index has been committed to the data dictionary. @@ -2007,6 +2022,9 @@ public: to do non-locking reads on DD tables. */ bool is_dd_table; + /* Flashback */ + bool is_history_table; + /** @return the clustered index */ const dict_index_t* first_index() const { diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/lock0lock.h mysql-8.0.3-rc/storage/innobase/include/lock0lock.h --- mysql-8.0.3-rc/storage/innobase/include/lock0lock.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/lock0lock.h 2020-04-19 23:52:38.850543863 +0800 @@ -451,6 +451,20 @@ lock_clust_rec_cons_read_sees( const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ ReadView* view); /*!< in: consistent read view */ /*********************************************************************//** +Flashback +Checks that a record is seen in a flashback point read. +@return true if sees, or false if an earlier version of the record +should be retrieved */ +bool +lock_clust_rec_flashback_point_read_sees( +/*==========================*/ + const rec_t* rec, /*!< in: user record which should be read or + passed over by a read cursor */ + dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: flashback read view */ + row_prebuilt_t* prebuilt);/*!< in: current prebuilt */ +/*********************************************************************//** Checks that a non-clustered index record is seen in a consistent read. NOTE that a non-clustered index page contains so little information on diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/mtr0log.h mysql-8.0.3-rc/storage/innobase/include/mtr0log.h --- mysql-8.0.3-rc/storage/innobase/include/mtr0log.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/mtr0log.h 2020-04-19 23:52:38.850543863 +0800 @@ -307,4 +307,14 @@ extra mlog buffer size for variable size #include "mtr0log.ic" +/********************************************************//** +Parses a log record of type MLOG_TLOG_EXTEND and MLOG_TLOG_RECORD. +@return parsed record end, NULL if not a complete record */ +byte* +mlog_parse_and_apply_log_rec_tlog( + const byte* ptr, /*!< in: buffer */ + const byte* end_ptr, /*!< in: buffer end */ + mlog_id_t* type); /*!< out: log record type */ + + #endif /* mtr0log_h */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/mtr0types.h mysql-8.0.3-rc/storage/innobase/include/mtr0types.h --- mysql-8.0.3-rc/storage/innobase/include/mtr0types.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/mtr0types.h 2020-04-19 23:52:38.846543863 +0800 @@ -235,8 +235,11 @@ enum mlog_id_t { /** create a SDI compact page */ MLOG_COMP_PAGE_CREATE_SDI = 64, + /** a tlog record */ + MLOG_TLOG_RECORD = 65, + /** biggest value (used in assertions) */ - MLOG_BIGGEST_TYPE = MLOG_COMP_PAGE_CREATE_SDI + MLOG_BIGGEST_TYPE = MLOG_TLOG_RECORD }; /* @} */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/row0ins.h mysql-8.0.3-rc/storage/innobase/include/row0ins.h --- mysql-8.0.3-rc/storage/innobase/include/row0ins.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/row0ins.h 2020-04-19 23:52:38.850543863 +0800 @@ -222,6 +222,8 @@ struct ins_node_t{ DB_DUPLICATE_KEY. Used in the case of REPLACE or INSERT ... ON DUPLICATE UPDATE. */ ulint magic_n; + + byte* end_trx_id_buf; }; #define INS_NODE_MAGIC_N 15849075 diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/row0mysql.h mysql-8.0.3-rc/storage/innobase/include/row0mysql.h --- mysql-8.0.3-rc/storage/innobase/include/row0mysql.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/row0mysql.h 2020-05-03 16:20:10.520539078 +0800 @@ -904,6 +904,13 @@ struct row_prebuilt_t { @retval true if records can be prefetched @retval false if records cannot be prefetched */ bool can_prefetch_records() const; + + /** Flashback: temporal condition */ + Temporal_hint* t_hint; + + /** Flashback: The number of fetched transitional versions + in temporal query. */ + ulint n_transitional_vers_fetched; }; /** Callback for row_mysql_sys_index_iterate() */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/row0row.ic mysql-8.0.3-rc/storage/innobase/include/row0row.ic --- mysql-8.0.3-rc/storage/innobase/include/row0row.ic 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/row0row.ic 2020-04-19 23:52:38.842543864 +0800 @@ -104,6 +104,31 @@ row_get_rec_roll_ptr( return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); } +/*********************************************************************//** +Reads the end trx id field from a clustered index record. +@return value of the field */ +UNIV_INLINE +trx_id_t +row_get_rec_end_trx_id( +/*===============*/ + const rec_t* rec, /*!< in: record */ + const dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ +{ + ulint offset; + + ut_ad(index->is_clustered()); + ut_ad(rec_offs_validate(rec, index, offsets)); + + offset = index->trx_id_offset; + + if (!offset) { + offset = row_get_trx_id_offset(index, offsets); + } + + return(trx_read_trx_id(rec + offset + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)); +} + /*****************************************************************//** When an insert or purge to a table is performed, this function builds the entry to be inserted into or purged from an index on the table. diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/row0sel.h mysql-8.0.3-rc/storage/innobase/include/row0sel.h --- mysql-8.0.3-rc/storage/innobase/include/row0sel.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/row0sel.h 2020-05-03 14:33:26.215243572 +0800 @@ -524,6 +524,38 @@ row_search_index_stats( ulint col_offset, ulonglong* cardinality); +/** Convert a row in the Innobase format to a row in the MySQL format. +Note that the template in prebuilt may advise us to copy only a few +columns to mysql_rec, other columns are left blank. +Maybe not all columns are needed in the query. +@param[in] prebuilt prebuilt structure +@param[in] rec Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch +@param[in] vrow virtual columns +@param[in] rec_clust TRUE if rec is in the clustered index + instead of prebuilt->index +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets(rec) +@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index + but the prebuilt->template is in + clustered index format and it + is used only for end range comparison +@param[in/out] buf buffer for fetched row in MySQL format +@param[in/out] next_buf alias of buf, or returned by + row_sel_fetch_last_buf +@return TRUE on success, FALSE if not all columns could be retrieved */ +ibool +row_sel_flashback_cache_mysql_rec( + row_prebuilt_t* prebuilt, + const rec_t* rec, + const dtuple_t* vrow, + const dict_index_t* index, + const ulint* offsets, + byte* &buf, + byte* &next_buf); + #include "row0sel.ic" #endif diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/row0upd.ic mysql-8.0.3-rc/storage/innobase/include/row0upd.ic --- mysql-8.0.3-rc/storage/innobase/include/row0upd.ic 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/row0upd.ic 2020-04-19 23:52:38.846543863 +0800 @@ -190,6 +190,10 @@ row_upd_rec_sys_fields( roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record, can be 0 during IMPORT */ { + /* Flashback: Do not update sys fields for history table */ + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) + return; + ut_ad(index->is_clustered()); ut_ad(rec_offs_validate(rec, index, offsets)); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/row0vers.h mysql-8.0.3-rc/storage/innobase/include/row0vers.h --- mysql-8.0.3-rc/storage/innobase/include/row0vers.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/row0vers.h 2020-05-03 14:38:06.199232805 +0800 @@ -113,13 +113,114 @@ row_vers_build_for_consistent_read( *old_vers is allocated; memory for possible intermediate versions is allocated and freed locally within the function */ - rec_t** old_vers,/*!< out, own: old version, or NULL + rec_t** old_vers,/*!< out: old version, or NULL if the history is missing or the record does not exist in the view, that is, it was freshly inserted afterwards */ const dtuple_t**vrow); /*!< out: reports virtual column info if any */ /*****************************************************************//** +Flashback +Constructs the version of a clustered index record which a flashback as..of.. +read should see. We assume that the trx id stored in rec is such that +the consistent read should not see rec in its present version. +@return DB_SUCCESS or DB_MISSING_HISTORY */ +dberr_t +row_vers_build_for_flashback_point_read( +/*===============================*/ + const rec_t* rec, /*!< in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will + also hold the latch on purge_view */ + dict_index_t* index, /*!< in: the clustered index */ + ulint** offsets,/*!< in/out: offsets returned by + rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: the consistent read view */ + mem_heap_t** offset_heap,/*!< in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap,/*!< in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers,/*!< out: old version, or NULL + if the history is missing or the record + does not exist in the view, that is, + it was freshly inserted afterwards */ + const dtuple_t**vrow, /*!< out: reports virtual column info if any */ + row_prebuilt_t* prebuilt);/*!< in: current prebuilt */ + +/*****************************************************************//** +Flashback +Constructs versions of a clustered index record which a flashback from..to.. +read should see. We will store the subsequent versions into cache first. +@return DB_SUCCESS or DB_MISSING_HISTORY */ +dberr_t +row_vers_build_for_flashback_range_read( +/*===============================*/ + const rec_t* rec, /*!< in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will + also hold the latch on purge_view */ + dict_index_t* index, /*!< in: the clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: the consistent read view */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap, /*!< in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers, /*!< out: old version, or NULL + if the history is missing or the record + does not exist in the view, that is, + it was freshly inserted afterwards */ + const dtuple_t**vrow, /*!< out: reports virtual column info if any */ + row_prebuilt_t* prebuilt, /*!< in: current prebuilt */ + byte* &mysql_rec, /*!< in/out: buffer for fetched row + in MySQL format */ + byte* &next_buf); /*!< in/out: alias of mysql_rec, or + returned by row_sel_fetch_last_buf */ + +/*****************************************************************//** +Flashback +Constructs versions of a clustered index record which a flashback trx_id +read should see. We will store the subsequent versions into cache first. +@return DB_SUCCESS or DB_MISSING_HISTORY */ +dberr_t +row_vers_build_for_flashback_trx_id_read( +/*===============================*/ + const rec_t* rec, /*!< in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /*!< in: mtr holding the latch on rec */ + dict_index_t* index, /*!< in: the clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: the consistent read view */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap, /*!< in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers, /*!< out: old version, or NULL + if the history is missing or the record + does not exist in the view, that is, + it was freshly inserted afterwards */ + const dtuple_t**vrow, /*!< out: virtual row */ + row_prebuilt_t* prebuilt, /*!< in: current prebuilt */ + byte* &mysql_rec, /*!< in/out: buffer for fetched row + in MySQL format */ + byte* &next_buf); /*!< in/out: alias of mysql_rec, or + returned by row_sel_fetch_last_buf */ + +/*****************************************************************//** Constructs the last committed version of a clustered index record, which should be seen by a semi-consistent read. */ void diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/tlog0lru.h mysql-8.0.3-rc/storage/innobase/include/tlog0lru.h --- mysql-8.0.3-rc/storage/innobase/include/tlog0lru.h 1970-01-01 08:00:00.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/tlog0lru.h 2020-05-03 16:25:52.898559911 +0800 @@ -0,0 +1,103 @@ +#ifndef TLOG_LRU_H +#define TLOG_LRU_H + +#include +#include +#include +#include +#include +#include + +#include "sync0rw.h" +#include "univ.i" + +class TlogDirtyCache; + +/** Status of page in cache. */ +enum tlog_page_status +{ + TLOG_PAGE_STATUS_EMPTY, + TLOG_PAGE_STATUS_LOADING, + TLOG_PAGE_STATUS_SAVING, + TLOG_PAGE_STATUS_VALID +}; + +class CachePage +{ +public: + const size_t k_size; + + char *const buffer; + bool dirty; + pthread_rwlock_t lock; + + CachePage(const unsigned int& page_size); + ~CachePage(); + + bool load_page_physical( + const std::string& path, + const int& byteoff); + + bool save_page_physical( + const std::string& path, + const int& byteoff); +}; + +class DynaCachePage : public CachePage +{ +public: + int pageno; + tlog_page_status status; + + DynaCachePage(const unsigned int& page_size); +}; + +typedef std::list::iterator Pageptr; + +class TlogLruCache +{ +private: + const std::string k_dir; + const std::string k_file_title; + const size_t k_struct_size; + const size_t k_page_size; + const unsigned int k_struct_per_page; + const size_t k_file_size; + const unsigned int k_page_per_file; + + pthread_rwlock_t cache_lock; + std::list lrulink; + std::unordered_map pageno2page; + + std::string get_file_path(const int pageno) const; + int get_byteoff(const int pageno) const; + unsigned int struct_id_to_pageno(unsigned int id) const; + unsigned int struct_id_to_struct_index(unsigned int id) const; + unsigned int struct_id_to_byte_index(unsigned int id) const; + void hit_page(Pageptr p); + + void switch_pageno(Pageptr p, int pageno); + Pageptr pickup_victim_page(); + Pageptr load_page(const int pageno); + bool load_page_physical(Pageptr p); + void save_page(Pageptr p); + bool save_page_physical(Pageptr p); + Pageptr zero_page(const int pageno); + void wait_io(const Pageptr p); + +public: + TlogLruCache( + const size_t& size, + const std::string& dir, + const std::string& file_title, + const unsigned int& struct_size, + const unsigned int& page_size, + const unsigned int& file_size); + ~TlogLruCache(); + void flush_pages(); + bool operate_struct_by_id(unsigned int id, std::function f); + bool read_struct_by_id(unsigned int id, std::function f); + void extend_page_by_struct_id(unsigned int id); +}; + +#endif diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/tlog0tlog.h mysql-8.0.3-rc/storage/innobase/include/tlog0tlog.h --- mysql-8.0.3-rc/storage/innobase/include/tlog0tlog.h 1970-01-01 08:00:00.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/tlog0tlog.h 2020-04-30 10:58:13.321841062 +0800 @@ -0,0 +1,131 @@ +#ifndef TLOG_TLOG_H +#define TLOG_TLOG_H + +#include + +#include "sql/handler.h" +#include "trx0trx.h" +#include "mtr0log.h" + +typedef uint64_t edge_t; + +class TlogLruCache; + +/** It records current offset of struct item. */ +class struct_item_offset +{ +protected: + static size_t cur_offset; +}; + + +/** It records size and offset of an item in page. */ +template +struct struct_item : public struct_item_offset +{ + typedef T type_name; + const size_t size; + const size_t offset; + + struct_item(size_t size) : size(size), offset(cur_offset) + { + cur_offset += size; + } + + struct_item(size_t size, size_t offset) : size(size), offset(offset) + { + cur_offset = offset + size; + } + + ~struct_item() {} +}; + + +/** Set item a new value in struct. */ +template +void +set_to_page( + char* p, /*!< in: pointer to struct */ + const T& item, /*!< in: item in struct */ + const typename T::type_name& value, /*!< in: new value */ + size_t index = 0) /*!< in: index of item */ +{ + memcpy(p + item.offset + item.size * index, &value, item.size); +} + +/** Get value of item in struct. */ +template +typename T::type_name +get_from_page( + char* p, /*!< in: pointer to struct */ + const T& item, /*!< in: item in struct */ + size_t index = 0) /*!< index of item */ +{ + typename T::type_name res; + memset(&res, 0, sizeof(res)); + memcpy(&res, p + item.offset + item.size * index, item.size); + return res; +} + +/** Get values of item list in struct. */ +template +void +get_from_page_to_container( + char* p, /*!< in: pointer to struct */ + const T& item, /*!< in: item in struct */ + std::function push_entry, /*!< in: entry function to container*/ + size_t size) /*!< in: number of values */ +{ + for (size_t i = 0; i < size; ++i) { + /* Push element to container by entry. */ + push_entry(get_from_page(p, item, i)); + } +} + +/* Handle API */ +typedef handler::tlog_t tlog_t; +typedef handler::tlog_info tlog_info; + +constexpr edge_t time2usec(long sec, long usec); +edge_t get_cur_usec(); + +void make_tlog(); +void free_tlog(); +void flush_tlog(); + +void record_start_tlog(trx_id_t id, long sec, long usec); +void record_finish_tlog(trx_id_t id, const char* op_info, mtr_t *mtr); + +trx_id_t get_trx_id_list_by_time_point( + std::vector& trx_ids, + trx_id_t max_trx_id, + const long& time); + +tlog_t read_tlog_by_trx_id(const trx_id_t& id, const trx_id_t& max_trx_id); + +std::vector read_tlog_by_trx_ids( + const trx_id_t& left_id, + const trx_id_t& right_id); + +std::vector read_tlog_finish_between( + timeval& left_time, + timeval& right_time, + trx_id_t max_trx_id); + +/** Record transaction when its status changes. */ +void +record_tlog_low( + const trx_id_t& id, /*!< in: transaction id */ + const tlog_trx_status& status, /*!< in: transaction status */ + const edge_t& usec); /*!< in: when transaction status changes */ + +/** Write the redo log for recording tlog. */ +void +record_tlog_write_log( + const trx_id_t& id, /*!< in: transaction id */ + const tlog_trx_status& status, /*!< in: transaction status */ + const edge_t& usec, /*!< in: when transaction status changes */ + mtr_t* mtr); /*!< in/out: mini-transaction */ + +#endif + diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/trx0rec.h mysql-8.0.3-rc/storage/innobase/include/trx0rec.h --- mysql-8.0.3-rc/storage/innobase/include/trx0rec.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/trx0rec.h 2020-04-19 23:52:38.842543864 +0800 @@ -140,6 +140,7 @@ trx_undo_update_rec_get_sys_cols( general parameters */ trx_id_t* trx_id, /*!< out: trx id */ roll_ptr_t* roll_ptr, /*!< out: roll ptr */ + trx_id_t* end_trx_id, /*!< out: end trx id */ ulint* info_bits); /*!< out: info bits state */ /*******************************************************************//** Builds an update vector based on a remaining part of an undo log record. @@ -161,6 +162,7 @@ trx_undo_update_rec_get_update( only trx id and roll ptr fields are added to the update vector */ trx_id_t trx_id, /*!< in: transaction id from this undorecord */ + trx_id_t end_trx_id,/*!< in: transaction id that creates this undo record (flashback) */ roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */ ulint info_bits,/*!< in: info bits from this undo record */ trx_t* trx, /*!< in: transaction */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/trx0sys.h mysql-8.0.3-rc/storage/innobase/include/trx0sys.h --- mysql-8.0.3-rc/storage/innobase/include/trx0sys.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/trx0sys.h 2020-04-19 23:52:38.850543863 +0800 @@ -158,6 +158,13 @@ Allocates a new transaction id. UNIV_INLINE trx_id_t trx_sys_get_new_trx_id(); + +/*****************************************************************//** +Allocates a new transaction id and assign id to trx->id.*/ +UNIV_INLINE +void +trx_sys_get_new_trx_id(trx_t* trx); + /*===================*/ /*****************************************************************//** Determines the maximum transaction id. diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/trx0sys.ic mysql-8.0.3-rc/storage/innobase/include/trx0sys.ic --- mysql-8.0.3-rc/storage/innobase/include/trx0sys.ic 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/trx0sys.ic 2020-04-26 09:00:41.450925830 +0800 @@ -25,6 +25,7 @@ Created 3/26/1996 Heikki Tuuri #include "trx0trx.h" #include "data0type.h" +#include "tlog0tlog.h" #ifndef UNIV_HOTBACKUP # include "srv0srv.h" # include "mtr0log.h" @@ -400,6 +401,21 @@ trx_sys_get_new_trx_id() return(trx_sys->max_trx_id++); } + +/*****************************************************************//** +Allocates a new transaction id and assign id to trx->id. +Write Tlog.*/ +UNIV_INLINE +void +trx_sys_get_new_trx_id(trx_t* trx) +{ + trx->id = trx_sys_get_new_trx_id(); + + if (!trx->purge_restore_trx) + record_start_tlog(trx->id, trx->trx_start_sec, trx->trx_start_usec); +} + + /*****************************************************************//** Determines the maximum transaction id. @return maximum currently allocated trx id; will be stale after the @@ -462,3 +478,30 @@ trx_sys_rw_trx_add(trx_t* trx) } #endif /* !UNIV_HOTBACKUP */ + +/* Writes an end trx id to an index page. */ +UNIV_INLINE +void +trx_write_end_trx_id( + byte* ptr, + trx_id_t id) +{ +#if DATA_END_TRX_ID_LEN != 6 +# error "DATA_END_TRX_ID_LEN != 6" +#endif + ut_ad(id > 0); + mach_write_to_6(ptr, id); +} + +#ifndef UNIV_HOTBACKUP +UNIV_INLINE +trx_id_t +trx_read_end_trx_id( + const byte* ptr) +{ +#if DATA_END_TRX_ID_LEN != 6 +# error "DATA_END_TRX_ID_LEN != 6" +#endif + return (mach_read_from_6(ptr)); +} +#endif diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/include/trx0trx.h mysql-8.0.3-rc/storage/innobase/include/trx0trx.h --- mysql-8.0.3-rc/storage/innobase/include/trx0trx.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/include/trx0trx.h 2020-04-19 23:52:38.850543863 +0800 @@ -1116,6 +1116,9 @@ struct trx_t { time_t start_time; /*!< time the state last time became TRX_STATE_ACTIVE */ + long trx_start_sec; + long trx_start_usec; + /** Weight/Age of the transaction in the record lock wait queue. */ int32_t age; @@ -1311,6 +1314,11 @@ struct trx_t { { return(skip_gap_locks()); } + + /*------------------------------*/ + bool purge_restore_trx; /*!< trx allocated by + row_purge_his_restore_prepare_thr + and used for restoration. */ }; /* Transaction isolation levels (trx->isolation_level) */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/lock/lock0lock.cc mysql-8.0.3-rc/storage/innobase/lock/lock0lock.cc --- mysql-8.0.3-rc/storage/innobase/lock/lock0lock.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/lock/lock0lock.cc 2020-05-03 14:46:11.061267951 +0800 @@ -392,6 +392,64 @@ lock_clust_rec_cons_read_sees( } /*********************************************************************//** +Flashback +Checks that a record is seen in a flashback point read. +@return true if sees, or false if an earlier version of the record +should be retrieved */ +bool +lock_clust_rec_flashback_point_read_sees( +/*==========================*/ + const rec_t* rec, /*!< in: user record which should be read or + passed over by a read cursor */ + dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: flashback read view */ + row_prebuilt_t* prebuilt)/*!< in: current prebuilt */ +{ + ut_ad(index->is_clustered()); + ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + + /* Temp-tables are not shared across connections and multiple + transactions from different connections cannot simultaneously + operate on the same temp-table and so read of temp-table is + always consistent read. */ + if (srv_read_only_mode || index->table->is_temporary()) { + ut_ad(view == 0 || index->table->is_temporary()); + return(true); + } + + /* NOTE that we call this function while holding the search + system latch. */ + + trx_id_t start_trx_id = row_get_rec_trx_id(rec, index, offsets); + trx_id_t end_trx_id = row_get_rec_end_trx_id(rec, index, offsets); + + trx_id_t max_trx_id = trx_sys_get_max_trx_id(); + + tlog_t tlog = read_tlog_by_trx_id(start_trx_id, max_trx_id); + + if (tlog.status == TLOG_TRX_STATUS_IN_PROGRESS + || tlog.status == TLOG_TRX_STATUS_ABORT) { + return(false); + } else if (prebuilt->t_hint->value.as_of_value >= tlog.finish_time.tv_sec) { + if (end_trx_id != END_TRX_ID_MAX_VALUE) { + tlog_t end_tlog = read_tlog_by_trx_id(end_trx_id, max_trx_id); + if (end_tlog.status == TLOG_TRX_STATUS_IN_PROGRESS + || end_tlog.finish_time.tv_sec > prebuilt->t_hint->value.as_of_value) { + return(true); + } else { + return(false); + } + } else { + return(true); + } + } else { + return(false); + } +} + +/*********************************************************************//** Checks that a non-clustered index record is seen in a consistent read. NOTE that a non-clustered index page contains so little information on @@ -4471,7 +4529,10 @@ lock_table( && !trx->read_only && trx->rsegs.m_redo.rseg == 0) { - trx_set_rw_mode(trx); + /* Flashback skip this operation */ + if (!(table->flags2 & DICT_TF2_HIST_TABLE)) { + trx_set_rw_mode(trx); + } } lock_mutex_enter(); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/log/log0ddl.cc mysql-8.0.3-rc/storage/innobase/log/log0ddl.cc --- mysql-8.0.3-rc/storage/innobase/log/log0ddl.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/log/log0ddl.cc 2020-05-03 14:46:33.961267070 +0800 @@ -383,6 +383,10 @@ DDL_Log_Table::create_tuple(const DDL_Re dfield = dtuple_get_nth_field(m_tuple, dict_col_get_no(col)); dfield_set_data(dfield, buf, DATA_ROLL_PTR_LEN); + col = m_table->get_sys_col(DATA_END_TRX_ID); + dfield = dtuple_get_nth_field(m_tuple, dict_col_get_no(col)); + dfield_set_data(dfield, buf, DATA_END_TRX_ID_LEN); + buf = static_cast(mem_heap_alloc(m_heap, DATA_TRX_ID_LEN)); mach_write_to_6(buf, m_trx->id); col = m_table->get_sys_col(DATA_TRX_ID); @@ -544,7 +548,9 @@ DDL_Log_Table::convert_to_ddl_record( const byte* data; ulint len; - if (i == DATA_ROLL_PTR || i == DATA_TRX_ID) { + if (i == DATA_ROLL_PTR || i == DATA_TRX_ID + || i == DATA_END_TRX_ID) + { continue; } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/log/log0log.cc mysql-8.0.3-rc/storage/innobase/log/log0log.cc --- mysql-8.0.3-rc/storage/innobase/log/log0log.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/log/log0log.cc 2020-04-19 23:52:38.826543864 +0800 @@ -2094,6 +2094,9 @@ log_checkpoint( } #endif /* !_WIN32 */ + /* Flush tlog to disk. */ + flush_tlog(); + log_mutex_enter(); rw_lock_x_unlock(&dict_persist->lock); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/log/log0recv.cc mysql-8.0.3-rc/storage/innobase/log/log0recv.cc --- mysql-8.0.3-rc/storage/innobase/log/log0recv.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/log/log0recv.cc 2020-04-19 23:52:38.826543864 +0800 @@ -2012,6 +2012,7 @@ recv_add_to_hash_table( ut_ad(type != MLOG_FILE_RENAME2); ut_ad(type != MLOG_DUMMY_RECORD); ut_ad(type != MLOG_INDEX_LOAD); + ut_ad(type != MLOG_TLOG_RECORD); recv_sys_t::Space* space; @@ -2824,6 +2825,17 @@ recv_parse_log_rec( } return(new_ptr == nullptr ? 0 : new_ptr - ptr); + + case MLOG_TLOG_RECORD: + case MLOG_TLOG_RECORD | MLOG_SINGLE_REC_FLAG: + + *page_no = FIL_NULL; + *space_id = SPACE_UNKNOWN; + + new_ptr = mlog_parse_and_apply_log_rec_tlog( + ptr, end_ptr, type); + + return(new_ptr == nullptr ? 0 : new_ptr - ptr); } new_ptr = mlog_parse_initial_log_record( @@ -2939,6 +2951,7 @@ recv_single_rec( case MLOG_FILE_RENAME2: case MLOG_FILE_CREATE2: case MLOG_TABLE_DYNAMIC_META: + case MLOG_TLOG_RECORD: /* These were already handled by recv_parse_log_rec() and @@ -3103,6 +3116,7 @@ recv_multi_rec(byte* ptr, byte* end_ptr) case MLOG_FILE_CREATE2: case MLOG_FILE_RENAME2: case MLOG_TABLE_DYNAMIC_META: + case MLOG_TLOG_RECORD: /* case MLOG_TRUNCATE: Disabled for WL6378 */ /* These were already handled by recv_parse_or_apply_log_rec_body(). */ @@ -4392,6 +4406,9 @@ get_mlog_string(mlog_id_t type) case MLOG_COMP_PAGE_CREATE_SDI: return("MLOG_COMP_PAGE_CREATE_SDI"); + + case MLOG_TLOG_RECORD: + return("MLOG_TLOG_RECORD"); } DBUG_ASSERT(0); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/mtr/mtr0log.cc mysql-8.0.3-rc/storage/innobase/mtr/mtr0log.cc --- mysql-8.0.3-rc/storage/innobase/mtr/mtr0log.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/mtr/mtr0log.cc 2020-05-03 14:47:35.709264695 +0800 @@ -31,6 +31,7 @@ Created 12/7/1995 Heikki Tuuri #include "log0recv.h" #include "my_inttypes.h" #include "page0page.h" +#include "tlog0tlog.h" #ifndef UNIV_HOTBACKUP # include "dict0boot.h" @@ -517,11 +518,15 @@ mlog_open_and_write_index( if (page_is_leaf(page_align(rec))) { mach_write_to_2( - log_ptr, dict_index_get_n_unique_in_tree(index)); + log_ptr, (dict_index_get_n_unique_in_tree(index) - + /* Flashback: For redo log of insert on history table, + do not count system columns on. */ + (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE) ? 3 : 0))); } else { mach_write_to_2( log_ptr, - dict_index_get_n_unique_in_tree_nonleaf(index)); + (dict_index_get_n_unique_in_tree_nonleaf(index) - + (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE) ? 3 : 0))); } log_ptr += 2; @@ -645,10 +650,14 @@ mlog_parse_index( == ind->get_col(DATA_TRX_ID - 1 + n_uniq)->len); ut_a(DATA_ROLL_PTR_LEN == ind->get_col(DATA_ROLL_PTR - 1 + n_uniq)->len); + ut_a(DATA_END_TRX_ID_LEN + == ind->get_col(DATA_END_TRX_ID - 1 + n_uniq)->len); ind->fields[DATA_TRX_ID - 1 + n_uniq].col = &table->cols[n + DATA_TRX_ID]; ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col = &table->cols[n + DATA_ROLL_PTR]; + ind->fields[DATA_END_TRX_ID - 1 + n_uniq].col + = &table->cols[n + DATA_END_TRX_ID]; } } /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ @@ -656,3 +665,59 @@ mlog_parse_index( *index = ind; return(ptr); } + +/********************************************************//** +Parse and apply a log record of type MLOG_TLOG_RECORD. +@return parsed record end, NULL if not a complete record */ +byte* +mlog_parse_and_apply_tlog_record( + const byte* ptr, /*!< in: buffer */ + const byte* end_ptr) /*!< in: buffer end */ +{ + /** Get transaction id. */ + trx_id_t id = mach_u64_parse_compressed(&ptr, end_ptr); + + /** Get transaction status. */ + tlog_trx_status status = + static_cast(mach_read_from_1(ptr)); + ++ptr; + + /** Get time when transaction status changes. */ + edge_t usec = mach_u64_parse_compressed(&ptr, end_ptr); + + /** Record transaction in tlog. */ + record_tlog_low(id, status, usec); + + return(const_cast(ptr)); +} + +/********************************************************//** +Parse and apply a log record of type MLOG_TLOG_RECORD. +@return parsed record end, NULL if not a complete record */ +byte* +mlog_parse_and_apply_log_rec_tlog( + const byte* ptr, /*!< in: buffer */ + const byte* end_ptr, /*!< in: buffer end */ + mlog_id_t* type) /*!< out: log record type */ +{ + if (end_ptr < ptr + 1) { + + return(NULL); + } + + *type = (mlog_id_t)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG); + ut_ad(*type <= MLOG_BIGGEST_TYPE); + + ++ptr; + + switch (*type) { + case MLOG_TLOG_RECORD: + ptr = mlog_parse_and_apply_tlog_record(ptr, end_ptr); + break; + + default: + ut_ad(0); + } + + return(const_cast(ptr)); +} diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/read/read0read.cc mysql-8.0.3-rc/storage/innobase/read/read0read.cc --- mysql-8.0.3-rc/storage/innobase/read/read0read.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/read/read0read.cc 2020-04-19 23:52:38.838543864 +0800 @@ -28,6 +28,10 @@ Created 2/16/1997 Heikki Tuuri #include "srv0srv.h" #include "trx0sys.h" +/* Flashback */ +#include +#include "tlog0tlog.h" + /* ------------------------------------------------------------------------------- FACT A: Cursor read view on a secondary index sees only committed versions diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/rem/rem0cmp.cc mysql-8.0.3-rc/storage/innobase/rem/rem0cmp.cc --- mysql-8.0.3-rc/storage/innobase/rem/rem0cmp.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/rem/rem0cmp.cc 2020-04-19 23:52:38.830543864 +0800 @@ -768,6 +768,22 @@ cmp_dtuple_rec_with_match_low( ut_ad(!dfield_is_ext(dtuple_field)); + /* Flashback + Avoid restoring the same version multiple times */ + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE) && + type->mtype == DATA_SYS && + type->prtype == (DATA_NOT_NULL | DATA_ROW_ID)) { + /* Do not compare DATA_ROW_ID for restoring previous + version. Because for any row being restored, a new + row id is assigned, if necessary. If the same version + is restored multiple times, each time the row being + restored gets a distinct row id, while these rows + are actually the same version and should not be + restored multiple times. */ + + continue; + } + /* For now, change buffering is only supported on indexes with ascending order on the columns. */ ret = cmp_data(type->mtype, type->prtype, @@ -939,6 +955,22 @@ cmp_dtuple_rec_with_match_bytes( goto order_resolved; } + /* Flashback + Avoid restoring the same version multiple times */ + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE) && + type->mtype == DATA_SYS && + type->prtype == (DATA_NOT_NULL | DATA_ROW_ID)) { + /* Do not compare DATA_ROW_ID for restoring previous + version. Because for any row being restored, a new + row id is assigned, if necessary. If the same version + is restored multiple times, each time the row being + restored gets a distinct row id, while these rows + are actually the same version and should not be + restored multiple times. */ + + goto next_field; + } + /* Set the pointers at the current byte */ rec_b_ptr += cur_bytes; @@ -1282,6 +1314,18 @@ cmp_rec_rec_with_match( goto order_resolved; } + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE) && + mtype == DATA_SYS && + prtype == (DATA_NOT_NULL | DATA_ROW_ID)) { + /* For history table with generated clustered index, + records in page are ordered by + , + instead of DATA_ROW_ID. + See cmp_dtuple_rec_with_match_low. */ + + continue; + } + ret = cmp_data(mtype, prtype, is_asc, rec1_b_ptr, rec1_f_len, rec2_b_ptr, rec2_f_len); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0ftsort.cc mysql-8.0.3-rc/storage/innobase/row/row0ftsort.cc --- mysql-8.0.3-rc/storage/innobase/row/row0ftsort.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0ftsort.cc 2020-04-19 23:52:38.850543863 +0800 @@ -1112,19 +1112,19 @@ row_merge_write_fts_node( fts_write_doc_id((byte*)&write_first_doc_id, node->first_doc_id); dfield_set_data(field, &write_first_doc_id, sizeof(doc_id_t)); - /* The third and fourth fileds(TRX_ID, ROLL_PTR) are filled already.*/ - /* The fifth field is last_doc_id */ - field = dtuple_get_nth_field(tuple, 4); + /* The third and fourth and fifth fileds(TRX_ID, ROLL_PTR, END_TRX_ID) are filled already.*/ + /* The sixth field is last_doc_id */ + field = dtuple_get_nth_field(tuple, 5); fts_write_doc_id((byte*)&write_last_doc_id, node->last_doc_id); dfield_set_data(field, &write_last_doc_id, sizeof(doc_id_t)); - /* The sixth field is doc_count */ - field = dtuple_get_nth_field(tuple, 5); + /* The seventh field is doc_count */ + field = dtuple_get_nth_field(tuple, 6); mach_write_to_4((byte*)&write_doc_count, (ib_uint32_t)node->doc_count); dfield_set_data(field, &write_doc_count, sizeof(ib_uint32_t)); - /* The seventh field is ilist */ - field = dtuple_get_nth_field(tuple, 6); + /* The eighth field is ilist */ + field = dtuple_get_nth_field(tuple, 7); dfield_set_data(field, node->ilist, node->ilist_size); ret = ins_ctx->btr_bulk->insert(tuple); @@ -1519,6 +1519,7 @@ row_fts_merge_insert( dict_index_t* aux_index; trx_t* trx; byte trx_id_buf[6]; + byte end_trx_id_buf[6]; roll_ptr_t roll_ptr = 0; dfield_t* field; @@ -1619,7 +1620,7 @@ row_fts_merge_insert( dict_index_copy_types(ins_ctx.tuple, aux_index, dict_index_get_n_fields(aux_index)); - /* Set TRX_ID and ROLL_PTR */ + /* Set TRX_ID and ROLL_PTR and END_TRX_ID */ trx_write_trx_id(trx_id_buf, trx->id); field = dtuple_get_nth_field(ins_ctx.tuple, 2); dfield_set_data(field, &trx_id_buf, 6); @@ -1627,6 +1628,10 @@ row_fts_merge_insert( field = dtuple_get_nth_field(ins_ctx.tuple, 3); dfield_set_data(field, &roll_ptr, 7); + trx_write_trx_id(end_trx_id_buf, trx->id); //end trx id is meaningless for fts + field = dtuple_get_nth_field(ins_ctx.tuple, 4); + dfield_set_data(field, &end_trx_id_buf, 6); + #ifdef UNIV_DEBUG ins_ctx.aux_index_id = id; #endif diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0ins.cc mysql-8.0.3-rc/storage/innobase/row/row0ins.cc --- mysql-8.0.3-rc/storage/innobase/row/row0ins.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0ins.cc 2020-04-25 23:30:52.389786273 +0800 @@ -160,6 +160,7 @@ row_ins_alloc_sys_fields( uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN; if (!table->is_intrinsic()) { len += DATA_ROLL_PTR_LEN; + len += DATA_END_TRX_ID_LEN; } ptr = static_cast(mem_heap_zalloc(heap, len)); @@ -186,11 +187,23 @@ row_ins_alloc_sys_fields( ptr += DATA_TRX_ID_LEN; if (!table->is_intrinsic()) { + /* 3. Populate roll ptr */ col = table->get_sys_col(DATA_ROLL_PTR); dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN); + + ptr += DATA_ROLL_PTR_LEN; + + /* 4. Populate end trx id */ + col = table->get_sys_col(DATA_END_TRX_ID); + + dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); + + dfield_set_data(dfield, ptr, DATA_END_TRX_ID_LEN); + + node->end_trx_id_buf = ptr; } } @@ -2691,7 +2704,8 @@ row_ins_clust_index_entry_low( before any further resource acquisitions to prevent deadlock. No need to log for temporary tables and intermediate tables */ if (!index->table->is_temporary() && !index->table->skip_alter_undo - && dict_table_has_autoinc_col(index->table)) { + && dict_table_has_autoinc_col(index->table) + && !DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) { ib_uint64_t counter = row_get_autoinc_counter( entry, index->table->autoinc_field_no); @@ -2711,9 +2725,10 @@ row_ins_clust_index_entry_low( || (index->allow_duplicates && index->table->is_intrinsic())); - if (!index->allow_duplicates + if ((!index->allow_duplicates && n_uniq - && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) { + && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) + || DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) { if (flags == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG @@ -3233,8 +3248,9 @@ row_ins_sec_index_entry_low( n_unique = dict_index_get_n_unique(index); - if (dict_index_is_unique(index) - && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) { + if ((dict_index_is_unique(index) + && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) + || DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) { mtr_commit(&mtr); DEBUG_SYNC_C("row_ins_sec_index_unique"); @@ -4070,6 +4086,12 @@ row_ins_step( memset(node->trx_id_buf, 0, DATA_TRX_ID_LEN); trx_write_trx_id(node->trx_id_buf, trx->id); + if (!node->table->is_intrinsic()) { + /* Set end_trx_id for insert operation */ + memset(node->end_trx_id_buf, 0, DATA_END_TRX_ID_LEN); + trx_write_end_trx_id(node->end_trx_id_buf, END_TRX_ID_MAX_VALUE); + } + if (node->state == INS_NODE_SET_IX_LOCK) { node->state = INS_NODE_ALLOC_ROW_ID; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0mysql.cc mysql-8.0.3-rc/storage/innobase/row/row0mysql.cc --- mysql-8.0.3-rc/storage/innobase/row/row0mysql.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0mysql.cc 2020-04-19 23:52:38.850543863 +0800 @@ -983,6 +983,9 @@ row_create_prebuilt( prebuilt->m_no_prefetch = false; prebuilt->m_read_virtual_key = false; + /* Flashback: no transitional versioned has been fetched. */ + prebuilt->n_transitional_vers_fetched = 0; + DBUG_RETURN(prebuilt); } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0purge.cc mysql-8.0.3-rc/storage/innobase/row/row0purge.cc --- mysql-8.0.3-rc/storage/innobase/row/row0purge.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0purge.cc 2020-05-03 16:26:52.810557607 +0800 @@ -40,6 +40,7 @@ Created 3/14/1997 Heikki Tuuri #include "row0mysql.h" #include "row0row.h" #include "row0upd.h" +#include "row0ins.h" #include "row0vers.h" #include "srv0mon.h" #include "srv0start.h" @@ -129,6 +130,427 @@ row_purge_reposition_pcur( } /***********************************************************//** +Flashback +Allocate trx, create fork and thr for restoration. +*/ +static +void +row_purge_his_restore_prepare_thr( + que_thr_t* &thr, /*!< in,out: used for insertion */ + mem_heap_t* &heap) +{ + DBUG_ENTER("row_purge_his_restore_prepare_thr"); + + que_fork_t* fork = NULL; + trx_t* trx = NULL; + + trx = trx_allocate_for_background(); + ut_ad(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE); + trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK; + trx->state = TRX_STATE_ACTIVE; + trx->op_info = "restoring previous versions to history table"; + trx->graph = NULL; + + fork = que_fork_create(NULL, NULL, QUE_FORK_INSERT, heap); + fork->trx = trx; + + thr = que_thr_create(fork, heap, NULL); + thr->child = NULL; + + DBUG_VOID_RETURN; +} + +/***********************************************************//** +Flashback +Open history table for restoration. +*/ +static +dberr_t +row_purge_his_restore_open_table( + const dict_table_t* const orig_tbl, /*!< in: original table */ + dict_table_t* &his_tbl, /*!< in,out: history table */ + MDL_ticket* &mdl, /*!< in,out: history table metadata lock */ + que_thr_t* thr, /*!< in: current_threads */ + THD* thd) /*!< in: THD */ +{ + std::string his_tbl_name = std::string(orig_tbl->name.m_name) + HISTORY_TABLE_POSTFIX; + + mutex_enter(&dict_sys->mutex); + his_tbl = dd_table_open_on_name(thd, &mdl, his_tbl_name.data(), true, + DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT); + + mutex_exit(&dict_sys->mutex); + + return lock_table(0, his_tbl, LOCK_IX, thr); +} + +/***********************************************************//** +Flashback +Close history table. +*/ +static +void +row_purge_his_restore_close_table( + dict_table_t* &his_tbl, /*!< in: history table */ + MDL_ticket* &mdl, /*!< in: history table metadata lock */ + que_thr_t* thr, /*!< in: current_threads */ + THD* thd) /*!< in: THD */ +{ + dd_table_close(his_tbl, thd, &mdl, false); + his_tbl = NULL; +} + +/***********************************************************//** +Flashback +Restore previous version of the record. +@return error code +*/ +static +int +row_purge_his_restore_single_ver( +/*=============================*/ + rec_t* rec, /*!< in: record being purged */ + ulint* offsets, /*!< in: offsets */ + purge_node_t* node, /*!< in: purge node */ + que_thr_t* thr, /*!< in: thread */ + mtr_t* mtr) /*!< in: mini transaction */ +{ + dberr_t err = DB_SUCCESS; + + mem_heap_t* heap = NULL; + rec_t* prev_vers = NULL; /* previous version */ + dict_index_t* clust_index = NULL; + /* clustered index of original table */ + + MDL_ticket* mdl = NULL; + row_ext_t* ext = NULL; + dict_table_t* his_tbl = NULL; + dict_index_t* his_index = NULL; + dtuple_t* restore_tuple = NULL; /* tuple to be restored */ + + THD* thd = current_thd; + + que_thr_t* restore_thr = NULL; /* used for insertion */ + trx_t* restore_trx = NULL; /* used for insertion */ + trx_id_t trx_id_reserved = 0; /* used for insertion */ + + heap = mem_heap_create(1024); + + /* Prepare thr used for insertion. */ + row_purge_his_restore_prepare_thr(restore_thr, heap); + /* restore_trx is newly allocated. */ + restore_trx = thr_get_trx(restore_thr); + + clust_index = node->table->first_index(); + + /* trx_undo_prev_version_build is not applicable, as + trx_undo_prev_version_build(rec) builds the version prior to rec, + and rec is the newest version. So, trx_undo_prev_version_build(rec) + always builds the version prior to the newest. + Here we needs the version indicated by undo record, and build it by + using upd_t made by undo record. */ + byte *buf = static_cast (mem_heap_alloc(heap, rec_offs_size(offsets))); + prev_vers = rec_copy(buf, rec, offsets); + rec_offs_make_valid(prev_vers, clust_index, offsets); + row_upd_rec_in_place(prev_vers, clust_index, offsets, node->update, NULL); + + /* Open and lock history table. */ + err = row_purge_his_restore_open_table(node->table, his_tbl, mdl, restore_thr, thd); + if (err != DB_SUCCESS) { + row_purge_his_restore_close_table(his_tbl, mdl, restore_thr, thd); + + mem_heap_free(heap); + + return 0; + } + + his_index = his_tbl->first_index(); + + /* Start transaction and + set transaction id as DB_TRX_ID of prev_vers. */ + restore_trx->purge_restore_trx = true; + trx_start_if_not_started(restore_trx, true); + trx_id_reserved = restore_trx->id; + restore_trx->id = row_get_rec_trx_id(prev_vers, + clust_index, offsets); + + /* Create tuple to be restored, and assign it basic attributes. */ + restore_tuple = dtuple_create_with_vcol(heap, his_tbl->get_n_cols(), + dict_table_get_n_v_cols(his_tbl)); + dict_table_copy_types(restore_tuple, his_tbl); + dtuple_set_info_bits(restore_tuple, + rec_get_info_bits(prev_vers, rec_offs_comp(offsets))); + + /* Convert prev_vers to restore_tuple. */ + for (ulint i = 0; i < dict_index_get_n_fields(his_index); ++i) { + dict_field_t* his_ind_field = his_index->get_field(i); + const dict_col_t* col = his_ind_field->col; + ulint len; + ulint col_no = dict_col_get_no(col); + dfield_t* dfield = dtuple_get_nth_field(restore_tuple, col_no); + const byte* field = rec_get_nth_field(prev_vers, offsets, i, &len); + + if (i == 0 && !dict_index_is_unique(his_index)) { + /* Allocate row id for history record if necessary. */ + byte* ptr = static_cast (mem_heap_alloc(heap, 6)); + row_id_t row_id = dict_sys_get_new_row_id(); + dict_sys_write_row_id(ptr, row_id); + + dfield_set_data(dfield, ptr, len); + } else { + dfield_set_data(dfield, field, len); + } + } + + /* Insert restore_tuple into history table. */ + while (his_index) { + if (his_index->type != DICT_FTS) { + dtuple_t* his_entry = row_build_index_entry(restore_tuple, + ext, his_index, heap); + ut_ad(dtuple_check_typed(his_entry)); + + if (his_index->is_clustered()) { + err = row_ins_clust_index_entry(his_index, + his_entry, restore_thr, 0, false); + } else { + err = row_ins_sec_index_entry(his_index, + his_entry, restore_thr, false); + } + } + /* Continue restoring if DB_DUPLICATE_KEY is found. + In case that some indexes are restored successfully, + while others are not as corruption happens. + The indexes not restored can be restored in recovery. */ + if (err != DB_SUCCESS && err != DB_DUPLICATE_KEY) { + break; + } + his_index = his_index->next(); + /* Skip corrupted secondary indexes. */ + dict_table_skip_corrupt_index(his_index); + } + + /* Commit and release restore_trx. */ + restore_trx->id = trx_id_reserved; + trx_commit(restore_trx); + trx_free_for_background(restore_trx); + + /* Close history table. */ + row_purge_his_restore_close_table(his_tbl, mdl, restore_thr, thd); + + /* Clean up. */ + mem_heap_free(heap); + + return err == DB_SUCCESS; +} + +/***********************************************************//** +Flashback +Restore multiple versions of the record. +This function is for undo record of type TRX_UNDO_DEL_MARK_REC. +Purging an undo record of type TRX_UNDO_DEL_MARK_REC results in +removing data in original table, then purging undo record of +type TRX_UNDO_UPD_EXIST_REC will not find the exact row in +original table, and some versions can not get restored. +So, all previous versions are restored as purging an undo record +of type TRX_UNDO_DEL_MARK_REC. +@return error code +*/ +static +int +row_purge_his_restore_multi_vers( +/*=============================*/ + rec_t* rec, /*!< in: record being purged */ + ulint* offsets, /*!< in: offsets */ + purge_node_t* node, /*!< in: purge node */ + que_thr_t* thr, /*!< in: thread */ + mtr_t* mtr) /*!< in: mini transaction */ +{ + dberr_t err = DB_SUCCESS; + + /* Used for getting previous versions of original table. */ + mem_heap_t* heap = NULL; + mem_heap_t* heap2 = NULL; + const dtuple_t* vrow = NULL; + rec_t* prev_vers = NULL; /* previous version */ + dict_index_t* clust_index = NULL; + /* clustered index of original table */ + rec_t* version = NULL; + /* used to traverse versions along with prev_vers */ + + /* Used for putting previous versions to history table. */ + MDL_ticket* mdl = NULL; + row_ext_t* ext = NULL; + dict_table_t* his_tbl = NULL; + dict_index_t* his_index = NULL; + rec_t* restore_rec = NULL; /* rec to be restored */ + dtuple_t* restore_tuple = NULL; /* tuple to be restored */ + mem_heap_t* restore_heap = NULL; /* heap used for restoring */ + ulint* restore_offsets = NULL; /* offsets of restore_rec */ + + THD* thd = current_thd; + + mem_heap_t* thr_heap = NULL; /* used for insertion */ + que_thr_t* restore_thr = NULL; /* used for insertion */ + trx_t* restore_trx = NULL; /* used for insertion */ + trx_id_t trx_id_reserved = 0; /* used for insertion */ + + thr_heap = mem_heap_create(1024); + /* Prepare thr used for insertion. */ + row_purge_his_restore_prepare_thr(restore_thr, thr_heap); + /* restore_trx is newly allocated. */ + restore_trx = thr_get_trx(restore_thr); + + /* Open and lock history table. */ + err = row_purge_his_restore_open_table(node->table, his_tbl, mdl, restore_thr, thd); + if (err != DB_SUCCESS) { + row_purge_his_restore_close_table(his_tbl, mdl, restore_thr, thd); + + mem_heap_free(thr_heap); + + return 0; + } + + heap = mem_heap_create(1024); + + his_index = his_tbl->first_index(); + clust_index = node->table->first_index(); + + /* Start transaction and reserve transaction id. */ + restore_trx->purge_restore_trx = true; + trx_start_if_not_started(restore_trx, true); + trx_id_reserved = restore_trx->id; + + /* Set version as the newest. */ + version = rec; + /* Traverse and restore each version. */ + for (; ;) { + heap2 = heap; + heap = mem_heap_create(1024); + vrow = NULL; + + trx_undo_prev_version_build(rec, mtr, version, + clust_index, offsets, + heap, &prev_vers, NULL, + dict_index_has_virtual(clust_index) + ? &vrow : NULL, 1); + + mem_heap_free(heap2); + + if (!prev_vers) { + break; + } + + /* Offsets of previous version. */ + offsets = rec_get_offsets(prev_vers, clust_index, + NULL, ULINT_UNDEFINED, &heap); + + /* Set transaction id as DB_TRX_ID of prev_vers. */ + restore_trx->id = row_get_rec_trx_id(prev_vers, + clust_index, offsets); + + /* Copy prev_vers to restore_rec. + prev_vers is for traverse. + restore_rec is for restoring. */ + restore_heap = mem_heap_create(1024); + + byte* buf = static_cast(mem_heap_alloc( + restore_heap, rec_offs_size(offsets))); + restore_rec= rec_copy(buf, prev_vers, offsets); + /* Offsets of restore record. */ + restore_offsets = rec_get_offsets(restore_rec, clust_index, + NULL, ULINT_UNDEFINED, &restore_heap); + rec_offs_make_valid(restore_rec, clust_index, restore_offsets); + + /* Create tuple to be restored, and assign it basic attributes. */ + restore_tuple = dtuple_create_with_vcol(restore_heap, his_tbl->get_n_cols(), + dict_table_get_n_v_cols(his_tbl)); + dict_table_copy_types(restore_tuple, his_tbl); + dtuple_set_info_bits(restore_tuple, + rec_get_info_bits(restore_rec, rec_offs_comp(restore_offsets))); + + /* Convert restore_rec to restore_tuple. */ + for (ulint i = 0; i < dict_index_get_n_fields(his_index); ++i) { + dict_field_t* his_ind_field = his_index->get_field(i); + const dict_col_t* col = his_ind_field->col; + ulint len; + ulint col_no = dict_col_get_no(col); + dfield_t* dfield = dtuple_get_nth_field(restore_tuple, col_no); + const byte* field = rec_get_nth_field(restore_rec, restore_offsets, i, &len); + + /* Allocate row id for history record if necessary. */ + if (i == 0 && !dict_index_is_unique(his_index)) { + byte* ptr = static_cast (mem_heap_alloc(restore_heap, 6)); + row_id_t row_id = dict_sys_get_new_row_id(); + dict_sys_write_row_id(ptr, row_id); + + dfield_set_data(dfield, ptr, len); + } else { + dfield_set_data(dfield, field, len); + } + } + + /* Insert restore_tuple into history table. */ + while (his_index) { + /* Create heap for inserting. */ + mem_heap_t* ins_heap = mem_heap_create(1024); + + if (his_index->type != DICT_FTS) { + dtuple_t* his_entry = row_build_index_entry(restore_tuple, + ext, his_index, ins_heap); + ut_ad(dtuple_check_typed(his_entry)); + + if (his_index->is_clustered()) { + err = row_ins_clust_index_entry(his_index, + his_entry, restore_thr, 0, false); + } else { + err = row_ins_sec_index_entry(his_index, + his_entry, restore_thr, false); + } + } + + mem_heap_free(ins_heap); + + /* Continue restoring if DB_DUPLICATE_KEY is found. + In case that some indexes are restored successfully, + while others are not as corruption happens. + The indexes not restored can be restored in recovery. */ + if (err != DB_SUCCESS && err != DB_DUPLICATE_KEY) { + break; + } + + his_index = his_index->next(); + + /* Skip corrupted secondary indexes. */ + dict_table_skip_corrupt_index(his_index); + } + + + mem_heap_free(restore_heap); + + /* Backtrack version. */ + version = prev_vers; + + /* Reset to clustered index. */ + his_index = his_tbl->first_index(); + } + + /* Commit and release restore_trx */ + restore_trx->id = trx_id_reserved; + trx_commit(restore_trx); + trx_free_for_background(restore_trx); + + /* Close history table. */ + row_purge_his_restore_close_table(his_tbl, mdl, restore_thr, thd); + + /* Clean up. */ + mem_heap_free(heap); + mem_heap_free(thr_heap); + + return err == DB_SUCCESS; +} + + +/***********************************************************//** Removes a delete marked clustered index record if possible. @retval true if the row was not found, or it was successfully removed @retval false if the row was modified after the delete marking */ @@ -137,7 +559,8 @@ bool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in/out: row purge node */ - ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ + ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ + que_thr_t* thr) { dict_index_t* index; bool success = true; @@ -178,6 +601,11 @@ row_purge_remove_clust_if_poss_low( goto func_exit; } + /* Resotre the old version into history table if necessary */ + if (node->table->flags2 & DICT_TF2_ORIG_TABLE) { + row_purge_his_restore_multi_vers(rec, offsets, node, thr, &mtr); + } + ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets))); if (mode == BTR_MODIFY_LEAF) { @@ -226,9 +654,10 @@ static MY_ATTRIBUTE((warn_unused_result) bool row_purge_remove_clust_if_poss( /*===========================*/ - purge_node_t* node) /*!< in/out: row purge node */ + purge_node_t* node, /*!< in/out: row purge node */ + que_thr_t* thr) { - if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) { + if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF, thr)) { return(true); } @@ -236,7 +665,7 @@ row_purge_remove_clust_if_poss( n_tries < BTR_CUR_RETRY_DELETE_N_TIMES; n_tries++) { if (row_purge_remove_clust_if_poss_low( - node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) { + node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, thr)) { return(true); } @@ -649,7 +1078,8 @@ static MY_ATTRIBUTE((warn_unused_result) bool row_purge_del_mark( /*===============*/ - purge_node_t* node) /*!< in/out: row purge node */ + purge_node_t* node, /*!< in/out: row purge node */ + que_thr_t* thr) { mem_heap_t* heap; @@ -678,7 +1108,7 @@ row_purge_del_mark( mem_heap_free(heap); - return(row_purge_remove_clust_if_poss(node)); + return(row_purge_remove_clust_if_poss(node, thr)); } /***********************************************************//** @@ -688,9 +1118,7 @@ static void row_purge_upd_exist_or_extern_func( /*===============================*/ -#ifdef UNIV_DEBUG - const que_thr_t*thr, /*!< in: query thread */ -#endif /* UNIV_DEBUG */ + que_thr_t* thr, /*!< in: query thread */ purge_node_t* node, /*!< in: row purge node */ trx_undo_rec_t* undo_rec) /*!< in: record to purge */ { @@ -731,6 +1159,51 @@ row_purge_upd_exist_or_extern_func( mem_heap_free(heap); skip_secondaries: + /* Flashback */ + if (node->table->flags2 & DICT_TF2_ORIG_TABLE) { + dict_index_t* index; + rec_t* rec; + mem_heap_t* heap = NULL; + ulint* offsets; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + mtr_t mtr; + rec_offs_init(offsets_); + + index = node->table->first_index(); + fil_space_t* space = fil_space_acquire_silent(index->space); + if (space == NULL) { + ut_ad(dict_table_is_sdi(node->table->id)); + } else { + fil_space_release(space); + } + log_free_check(); + mtr_start(&mtr); + mtr_sx_lock(dict_index_get_lock(index), &mtr); + + if (!row_purge_reposition_pcur(BTR_MODIFY_LEAF, node, &mtr)) { + if (!row_purge_reposition_pcur(BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, node, &mtr)) { + goto skip_his_restore; + } + } + + rec = btr_pcur_get_rec(&node->pcur); + offsets = rec_get_offsets( + rec, index, offsets_, ULINT_UNDEFINED, &heap); + + row_purge_his_restore_single_ver(rec, offsets, node, thr, &mtr); + +skip_his_restore: + if (heap) { + mem_heap_free(heap); + } + + if (node->found_clust) { + btr_pcur_commit_specify_mtr(&node->pcur, &mtr); + } else { + mtr_commit(&mtr); + } + } + /* Free possible externally stored fields */ for (ulint i = 0; i < upd_get_n_fields(node->update); i++) { @@ -828,7 +1301,7 @@ skip_secondaries: row_purge_upd_exist_or_extern_func(thr,node,undo_rec) #else /* UNIV_DEBUG */ # define row_purge_upd_exist_or_extern(thr,node,undo_rec) \ - row_purge_upd_exist_or_extern_func(node,undo_rec) + row_purge_upd_exist_or_extern_func(thr,node,undo_rec) #endif /* UNIV_DEBUG */ /***********************************************************//** @@ -855,6 +1328,7 @@ row_purge_parse_undo_rec( table_id_t table_id; trx_id_t trx_id; roll_ptr_t roll_ptr; + trx_id_t end_trx_id; ulint info_bits; ulint type; @@ -873,7 +1347,7 @@ row_purge_parse_undo_rec( } ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); + &end_trx_id, &info_bits); node->table = NULL; node->trx_id = trx_id; @@ -1075,7 +1549,8 @@ err_exit: if (type == TRX_UNDO_UPD_EXIST_REC && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) - && !*updated_extern) { + && !*updated_extern + && !(node->table->flags2 & DICT_TF2_ORIG_TABLE)) { /* Purge requires no changes to indexes: we may return */ goto close_exit; @@ -1087,7 +1562,7 @@ err_exit: trx = thr_get_trx(thr); ptr = trx_undo_update_rec_get_update( - ptr, clust_index, type, trx_id, roll_ptr, info_bits, trx, + ptr, clust_index, type, trx_id, roll_ptr, end_trx_id, info_bits, trx, node->heap, &(node->update)); /* Read to the partial row the fields that occur in indexes */ @@ -1115,9 +1590,7 @@ bool row_purge_record_func( purge_node_t* node, trx_undo_rec_t* undo_rec, -#ifdef UNIV_DEBUG - const que_thr_t*thr, -#endif /* UNIV_DEBUG */ + que_thr_t* thr, bool updated_extern, THD* thd) { @@ -1134,7 +1607,7 @@ row_purge_record_func( switch (node->rec_type) { case TRX_UNDO_DEL_MARK_REC: - purged = row_purge_del_mark(node); + purged = row_purge_del_mark(node, thr); if (!purged) { break; } @@ -1183,7 +1656,7 @@ row_purge_record_func( row_purge_record_func(node,undo_rec,thr,updated_extern,thd) #else /* UNIV_DEBUG */ # define row_purge_record(node,undo_rec,thr,updated_extern,thd) \ - row_purge_record_func(node,undo_rec,updated_extern,thd) + row_purge_record_func(node,undo_rec,thr,updated_extern,thd) #endif /* UNIV_DEBUG */ /***********************************************************//** diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0sel.cc mysql-8.0.3-rc/storage/innobase/row/row0sel.cc --- mysql-8.0.3-rc/storage/innobase/row/row0sel.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0sel.cc 2020-05-03 16:28:44.846553299 +0800 @@ -66,6 +66,7 @@ Created 12/19/1997 Heikki Tuuri #include "ut0new.h" #include "lob0lob.h" #include "dict0dd.h" +#include "trx0rec.h" /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -3392,6 +3393,131 @@ row_sel_build_prev_vers_for_mysql( } /*********************************************************************//** +Builds a previous version of a clustered index record for a flashback read. +Check visibility using timestamp +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +row_sel_build_prev_vers_for_flashback_point( +/*==============================*/ + ReadView* read_view, /*!< in: read view */ + dict_index_t* clust_index, /*!< in: clustered index */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ + const rec_t* rec, /*!< in: record in a clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, clust_index) */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + rec_t** old_vers, /*!< out: old version, or NULL if the + record does not exist in the view: + i.e., it was freshly inserted + afterwards */ + const dtuple_t**vrow, /*!< out: dtuple to hold old virtual + column data */ + mtr_t* mtr) /*!< in: mtr */ +{ + dberr_t err; + + if (prebuilt->old_vers_heap) { + mem_heap_empty(prebuilt->old_vers_heap); + } else { + prebuilt->old_vers_heap = mem_heap_create(200); + } + + err = row_vers_build_for_flashback_point_read( + rec, mtr, clust_index, offsets, read_view, offset_heap, + prebuilt->old_vers_heap, old_vers, vrow, prebuilt); + return(err); +} + +/*********************************************************************//** +Builds previous versions of a clustered index record for a flashback range read +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +row_sel_build_prev_vers_for_flashback_range( +/*==============================*/ + ReadView* read_view,/*!< in: consistent read view */ + dict_index_t* clust_index, /*!< in: clustered index */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ + const rec_t* rec, /*!< in: record in a clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, clust_index) */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + rec_t** old_vers, /*!< out: old version, or NULL if the + record does not exist in the view: + i.e., it was freshly inserted + afterwards */ + const dtuple_t**vrow, /*!< out: dtuple to hold old virtual + column data */ + mtr_t* mtr, /*!< in: mtr */ + byte* &buf, /*!< in/out: buffer for fecthed row + in MySQL format */ + byte* &next_buf) /*!< in/out: alias of buf, or + returned by row_sel_fetch_last_buf */ +{ + DBUG_ENTER("row_sel_build_prev_vers_for_flashback_range"); + + dberr_t err; + + if (prebuilt->old_vers_heap) { + mem_heap_empty(prebuilt->old_vers_heap); + } else { + prebuilt->old_vers_heap = mem_heap_create(200); + } + + err = row_vers_build_for_flashback_range_read( + rec, mtr, clust_index, offsets, read_view, + offset_heap, prebuilt->old_vers_heap, + old_vers, vrow, prebuilt, buf, next_buf); + + DBUG_RETURN(err); +} + +/*********************************************************************//** +Builds previous versions of a clustered index record for a flashback trx_id read +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +row_sel_build_prev_vers_for_flashback_trx_id( +/*==============================*/ + ReadView* read_view, /*!< in: consistent read view */ + dict_index_t* clust_index, /*!< in: clustered index */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ + const rec_t* rec, /*!< in: record in a clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, clust_index) */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + rec_t** old_vers, /*!< out: old version, or NULL if the + record does not exist in the view: + i.e., it was freshly inserted + afterwards */ + const dtuple_t**vrow, /*!< out: dtuple to hold old virtual + column data */ + mtr_t* mtr, /*!< in: mtr */ + byte* &buf, /*!< in/out: buffer for fecthed row + in MySQL format */ + byte* &next_buf) /*!< in/out: alias of buf, or + returned by row_sel_fetch_last_buf */ +{ + dberr_t err; + + if (prebuilt->old_vers_heap) { + mem_heap_empty(prebuilt->old_vers_heap); + } else { + prebuilt->old_vers_heap = mem_heap_create(200); + } + + err = row_vers_build_for_flashback_trx_id_read( + rec, mtr, clust_index, offsets, read_view, offset_heap, + prebuilt->old_vers_heap, old_vers, vrow, prebuilt, buf, + next_buf); + return(err); +} + +/*********************************************************************//** Retrieves the clustered index record corresponding to a record in a non-clustered index. Does the necessary locking. Used in the MySQL interface. @@ -5049,6 +5175,18 @@ wait_table_again: &same_user_rec, BTR_SEARCH_LEAF, pcur, moves_up, &mtr); + //Temporal query fetching multiple versions on original table. + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_ORIG_TABLE) && + prebuilt->t_hint && + (prebuilt->t_hint->type == FROM_TO_HINT || + prebuilt->t_hint->type == TRX_ID_HINT) && + !need_to_process && + pcur->rel_pos == BTR_PCUR_ON && + prebuilt->n_transitional_vers_fetched) { + + need_to_process = TRUE; + } + if (UNIV_UNLIKELY(need_to_process)) { if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { @@ -5136,6 +5274,7 @@ wait_table_again: } } } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) { + btr_pcur_open_at_index_side( mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF, pcur, false, 0, &mtr); @@ -5592,34 +5731,149 @@ no_gap_lock: high force recovery level set, we try to avoid crashes by skipping this lookup */ - if (srv_force_recovery < 5 - && !lock_clust_rec_cons_read_sees( - rec, index, offsets, - trx_get_read_view(trx))) { - - rec_t* old_vers; - /* The following call returns 'offsets' - associated with 'old_vers' */ - err = row_sel_build_prev_vers_for_mysql( - trx->read_view, clust_index, - prebuilt, rec, &offsets, &heap, - &old_vers, need_vrow ? &vrow : NULL, - &mtr); + if (srv_force_recovery < 5) { - if (err != DB_SUCCESS) { + /* Flashback */ + if (!prebuilt->t_hint) { - goto lock_wait_or_error; - } + if(!lock_clust_rec_cons_read_sees( + rec, index, offsets, + trx_get_read_view(trx))) { + + rec_t* old_vers; + /* The following call returns 'offsets' + associated with 'old_vers' */ + err = row_sel_build_prev_vers_for_mysql( + trx->read_view, clust_index, + prebuilt, rec, &offsets, &heap, + &old_vers, need_vrow ? &vrow : NULL, + &mtr); + + if (err != DB_SUCCESS) { + + goto lock_wait_or_error; + } + + if (old_vers == NULL) { + /* The row did not exist yet in + the read view */ - if (old_vers == NULL) { - /* The row did not exist yet in - the read view */ + goto next_rec; + } - goto next_rec; - } + rec = old_vers; + prev_rec = rec; + } - rec = old_vers; - prev_rec = rec; + } else if (prebuilt->t_hint && prebuilt->t_hint->type == + Temporal_hint_type::AS_OF_HINT) { + + if (prebuilt->table == prebuilt->index->table) { + + if (!lock_clust_rec_flashback_point_read_sees( + rec, index, offsets, trx->read_view, prebuilt)) { + + rec_t* old_vers; + /* The following call returns 'offsets' + associated with 'old_vers' */ + err = row_sel_build_prev_vers_for_flashback_point( + trx->read_view, clust_index, + prebuilt, rec, &offsets, &heap, + &old_vers, need_vrow ? &vrow : NULL, + &mtr); + + if (err == DB_MISSING_HISTORY) { + goto next_rec; + } + + if (err != DB_SUCCESS) { + + goto lock_wait_or_error; + } + + if (old_vers == NULL) { + /* The row did not exist yet in + the read view */ + + goto next_rec; + } + + rec = old_vers; + prev_rec = rec; + } + + } else { + if (!lock_clust_rec_flashback_point_read_sees( + rec, index, offsets, trx->read_view, prebuilt)) { + goto next_rec; + } + } + + } else if (prebuilt->t_hint && prebuilt->t_hint->type == + Temporal_hint_type::FROM_TO_HINT) { + + rec_t* old_vers = NULL; + /* The following call returns 'offsets' + associated with 'old_vers' */ + err = row_sel_build_prev_vers_for_flashback_range( + trx->read_view, clust_index, + prebuilt, rec, &offsets, &heap, + &old_vers, need_vrow ? &vrow : NULL, + &mtr, buf, next_buf); + + if (err != DB_SUCCESS && err != DB_MISSING_HISTORY) { + + goto lock_wait_or_error; + } + + /* Cache is full. */ + const auto max_rows_to_cache = record_buffer ? + record_buffer->max_records() : MYSQL_FETCH_CACHE_SIZE; + if (prebuilt->n_fetch_cached >= max_rows_to_cache) + goto idx_cond_failed; + + if (old_vers == NULL) { + /* The row did not exist yet in + the read view */ + + goto next_rec; + } + + rec = old_vers; + prev_rec = rec; + + } else if (prebuilt->t_hint && prebuilt->t_hint->type == + Temporal_hint_type::TRX_ID_HINT) { + rec_t* old_vers = NULL; + /* The following call returns 'offsets' + associated with 'old_vers' */ + err = row_sel_build_prev_vers_for_flashback_trx_id( + trx->read_view, clust_index, + prebuilt, rec, &offsets, &heap, + &old_vers, need_vrow ? &vrow : NULL, + &mtr, buf, next_buf); + + if (err != DB_SUCCESS && err != DB_MISSING_HISTORY) { + + goto lock_wait_or_error; + } + + /* Cache is full. */ + const auto max_rows_to_cache = record_buffer ? + record_buffer->max_records() : MYSQL_FETCH_CACHE_SIZE; + if (prebuilt->n_fetch_cached >= max_rows_to_cache) + goto idx_cond_failed; + + if (old_vers == NULL) { + /* The row did not exist yet in + the read view */ + + goto next_rec; + } + + rec = old_vers; + prev_rec = rec; + } } } else { /* We are looking into a non-clustered index, @@ -5828,6 +6082,16 @@ requires_clust_rec: See ha_innobase::ha_is_record_buffer_wanted(). */ ut_ad(prebuilt->can_prefetch_records() || record_buffer == nullptr); + /* No need to cache result_rec in record buffer, + nor copy result_rec to buf. */ + if (prebuilt->t_hint && + (prebuilt->t_hint->type == Temporal_hint_type::FROM_TO_HINT || + prebuilt->t_hint->type == Temporal_hint_type::TRX_ID_HINT)) { + + err = DB_SUCCESS; + goto idx_cond_failed; + } + /* Decide whether to prefetch extra rows. At this point, the clustered index record is protected by a page latch that was acquired when pcur was positioned. @@ -6088,6 +6352,21 @@ next_rec: } if (moves_up) { + + //Temporal query fetching multiple versions on original table. + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_ORIG_TABLE) && + prebuilt->t_hint && + (prebuilt->t_hint->type == FROM_TO_HINT || + prebuilt->t_hint->type == TRX_ID_HINT)) { + + //More versions to fetch, dont move persist cursor. + if (prebuilt->n_transitional_vers_fetched) { + btr_pcur_store_position(pcur, &mtr); + + goto normal_return; + } + } + bool move; if (spatial_search) { @@ -6720,3 +6999,52 @@ row_search_index_stats( mem_heap_free(heap); return(false); } + +/** Flashback +Cache old version into the query buffer. +@param[in] prebuilt prebuilt structure +@param[in] rec Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch +@param[in] vrow virtual columns +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets(rec) +@param[in/out] buf buffer for fetched row in MySQL format +@param[in/out] next_buf alias of buf, or returned by row_sel_fetch_last_buf +@return TRUE on success, FALSE if cache failed */ +ibool +row_sel_flashback_cache_mysql_rec( + row_prebuilt_t* prebuilt, + const rec_t* rec, + const dtuple_t* vrow, + const dict_index_t* index, + const ulint* offsets, + byte* &buf, + byte* &next_buf) +{ + DBUG_ENTER("row_sel_flashback_cache_mysql_rec"); + + const auto record_buffer = row_sel_get_record_buffer(prebuilt); + const auto max_rows_to_cache = record_buffer ? + record_buffer->max_records() : MYSQL_FETCH_CACHE_SIZE; + + //No space in cache. + if (prebuilt->n_fetch_cached >= max_rows_to_cache) { + + DBUG_RETURN(false); + } + + next_buf = next_buf ? row_sel_fetch_last_buf(prebuilt) : buf; + + if (!row_sel_store_mysql_rec(next_buf, prebuilt, + rec, vrow, true, index, offsets, false)) { + + DBUG_RETURN(false); + } + + if (next_buf != buf) + row_sel_enqueue_cache_row_for_mysql(next_buf, prebuilt); + + DBUG_RETURN(true); +} diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0umod.cc mysql-8.0.3-rc/storage/innobase/row/row0umod.cc --- mysql-8.0.3-rc/storage/innobase/row/row0umod.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0umod.cc 2020-04-19 23:52:38.854543863 +0800 @@ -1109,6 +1109,7 @@ row_undo_mod_parse_undo_rec( table_id_t table_id; trx_id_t trx_id; roll_ptr_t roll_ptr; + trx_id_t end_trx_id; ulint info_bits; ulint type; ulint cmpl_info; @@ -1156,13 +1157,13 @@ row_undo_mod_parse_undo_rec( clust_index = node->table->first_index(); ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); + &end_trx_id, &info_bits); ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), node->heap); ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, - roll_ptr, info_bits, node->trx, + roll_ptr, end_trx_id, info_bits, node->trx, node->heap, &(node->update)); node->new_trx_id = trx_id; node->cmpl_info = cmpl_info; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0upd.cc mysql-8.0.3-rc/storage/innobase/row/row0upd.cc --- mysql-8.0.3-rc/storage/innobase/row/row0upd.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0upd.cc 2020-04-19 23:52:38.850543863 +0800 @@ -2898,6 +2898,11 @@ row_upd_del_mark_clust_rec( err = btr_cur_del_mark_set_clust_rec( flags, btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), index, offsets, thr, node->row, mtr); + + /* Flashback: Synchronized purge data page, but atomicity is not guaranteed */ + if (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_HIST_TABLE)) + page_cur_delete_rec(btr_cur_get_page_cur(btr_cur), index, offsets, mtr); + if (err == DB_SUCCESS && referenced) { /* NOTE that the following call loses the position of pcur ! */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/row/row0vers.cc mysql-8.0.3-rc/storage/innobase/row/row0vers.cc --- mysql-8.0.3-rc/storage/innobase/row/row0vers.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/row/row0vers.cc 2020-05-03 15:43:50.154695927 +0800 @@ -40,6 +40,7 @@ Created 2/6/1997 Heikki Tuuri #include "row0mysql.h" #include "row0row.h" #include "row0upd.h" +#include "row0sel.h" #include "row0vers.h" #include "trx0purge.h" #include "trx0rec.h" @@ -1228,6 +1229,610 @@ row_vers_build_for_consistent_read( } mem_heap_free(heap); + + return(err); +} + +/*****************************************************************//** +Constructs the version of a clustered index record which a consistent +read should see. We assume that the trx id stored in rec is such that +the consistent read should not see rec in its present version. +@return DB_SUCCESS or DB_MISSING_HISTORY */ +dberr_t +row_vers_build_for_flashback_point_read( +/*===============================*/ + const rec_t* rec, /*!< in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /*!< in: mtr holding the latch on rec */ + dict_index_t* index, /*!< in: the clustered index */ + ulint** offsets,/*!< in/out: offsets returned by + rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: the consistent read view */ + mem_heap_t** offset_heap,/*!< in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap,/*!< in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers,/*!< out, own: old version, or NULL + if the history is missing or the record + does not exist in the view, that is, + it was freshly inserted afterwards */ + const dtuple_t**vrow, /*!< out: virtual row */ + row_prebuilt_t* prebuilt)/*!< in: current prebuilt */ +{ + const rec_t* version; + rec_t* prev_version; + trx_id_t trx_id; + trx_id_t end_trx_id; + mem_heap_t* heap = NULL; + byte* buf; + dberr_t err; + + ut_ad(index->is_clustered()); + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S)); + + ut_ad(rec_offs_validate(rec, index, *offsets)); + + trx_id = row_get_rec_trx_id(rec, index, *offsets); + + ut_ad(!vrow || !(*vrow)); + + version = rec; + + ulint status = 0; + + for (;;) { + mem_heap_t* prev_heap = heap; + + heap = mem_heap_create(1024); + + if (vrow) { + *vrow = NULL; + } + + /* If purge can't see the record then we can't rely on + the UNDO log record. */ + + bool purge_sees = trx_undo_prev_version_build( + rec, mtr, version, index, *offsets, heap, + &prev_version, NULL, vrow, status); + + err = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY; + + if (prev_heap != NULL) { + mem_heap_free(prev_heap); + } + + if (prev_version == NULL) { + /* It was a freshly inserted version */ + *old_vers = NULL; + ut_ad(!vrow || !(*vrow)); + break; + } + + if (err == DB_MISSING_HISTORY) { + break; + } + + *offsets = rec_get_offsets( + prev_version, index, *offsets, ULINT_UNDEFINED, + offset_heap); + +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(prev_version, *offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + + trx_id = row_get_rec_trx_id(prev_version, index, *offsets); + end_trx_id = row_get_rec_end_trx_id(prev_version, index, *offsets); + + bool flag = false; + trx_id_t max_trx_id = trx_sys_get_max_trx_id(); + + tlog_t tlog = read_tlog_by_trx_id(trx_id, max_trx_id); + + if ((trx_id == prebuilt->trx->id || + tlog.status != TLOG_TRX_STATUS_IN_PROGRESS) + && tlog.status != TLOG_TRX_STATUS_ABORT) { + if (prebuilt->t_hint->value.as_of_value >= tlog.finish_time.tv_sec) { + if (end_trx_id != END_TRX_ID_MAX_VALUE) { + tlog_t end_tlog = read_tlog_by_trx_id(end_trx_id, max_trx_id); + if (end_tlog.status == TLOG_TRX_STATUS_IN_PROGRESS + || end_tlog.finish_time.tv_sec > prebuilt->t_hint->value.as_of_value) { + flag = true; + } + } else { + flag = true; + } + } + } + + if (flag) { + /* The view already sees this version: we can copy + it to in_heap and return */ + + buf = static_cast( + mem_heap_alloc( + in_heap, rec_offs_size(*offsets))); + + *old_vers = rec_copy(buf, prev_version, *offsets); + + if (vrow && *vrow) { + *vrow = dtuple_copy(*vrow, in_heap); + dtuple_dup_v_fld(*vrow, in_heap); + } + break; + } + + version = prev_version; + } + + if (*old_vers != NULL) { + rec_offs_make_valid(*old_vers, index, *offsets); + } + + mem_heap_free(heap); + + return(err); +} + +/*****************************************************************//** +Flashback +Constructs versions of a clustered index record which a flashback from..to.. +read should see. We will store the subsequent versions into cache first. +@return DB_SUCCESS or DB_MISSING_HISTORY */ +dberr_t +row_vers_build_for_flashback_range_read( +/*===============================*/ + const rec_t* rec, /*!< in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /*!< in: mtr holding the latch on rec */ + dict_index_t* index, /*!< in: the clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: the consistent read view */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap, /*!< in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers, /*!< out, own: old version, or NULL + if the history is missing or the record + does not exist in the view, that is, + it was freshly inserted afterwards */ + const dtuple_t**vrow, /*!< out: virtual row */ + row_prebuilt_t* prebuilt, /*!< in: current prebuilt */ + byte* &mysql_rec, /*!< in/out: buffer for fetched row + in MySQL format */ + byte* &next_buf) /*!< in/out: alias of mysql_rec, or + returned by row_sel_fetch_last_buf */ +{ + const rec_t* version; + rec_t* prev_version; + trx_id_t trx_id; + trx_id_t end_trx_id; + trx_id_t max_trx_id; + mem_heap_t* heap = NULL; + byte* buf; + dberr_t err = DB_SUCCESS; + bool flag = false; + ulint n_vers_fetched = 0; //The number of fetched versions + + max_trx_id = trx_sys_get_max_trx_id(); + + ut_ad(index->is_clustered()); + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S)); + + ut_ad(rec_offs_validate(rec, index, *offsets)); + + trx_id = row_get_rec_trx_id(rec, index, *offsets); + end_trx_id = row_get_rec_end_trx_id(rec, index, *offsets); + + ut_ad(!vrow || !(*vrow)); + + version = rec; + + tlog_t tlog = read_tlog_by_trx_id(trx_id, max_trx_id); + if ((trx_id == prebuilt->trx->id + || tlog.status != TLOG_TRX_STATUS_IN_PROGRESS) + && tlog.status != TLOG_TRX_STATUS_ABORT) { + if (prebuilt->t_hint->value.from_to_value[1] >= + tlog.finish_time.tv_sec) { + if (end_trx_id != END_TRX_ID_MAX_VALUE) { + tlog_t end_tlog = read_tlog_by_trx_id(end_trx_id, max_trx_id); + if (end_tlog.status == TLOG_TRX_STATUS_IN_PROGRESS + || end_tlog.finish_time.tv_sec > + prebuilt->t_hint->value.from_to_value[0]) { + flag = true; + } + } else { + flag = true; + } + } + } + + if (flag) { + + ++n_vers_fetched; + + if (!prebuilt->n_transitional_vers_fetched) { + + /* The version has not been fetched. */ + buf = static_cast( + mem_heap_alloc( + in_heap, rec_offs_size(*offsets))); + + *old_vers = rec_copy(buf, version, *offsets); + + if (vrow && *vrow) { + *vrow = dtuple_copy(*vrow, in_heap); + dtuple_dup_v_fld(*vrow, in_heap); + } + + /* Cache version in record buffer. */ + /* No need to cache the delete marked version, + otherwise duplicated rows will be shown. */ + if (!rec_get_deleted_flag(version, page_rec_is_comp(version)) && + !row_sel_flashback_cache_mysql_rec(prebuilt, + version, NULL, index, *offsets, mysql_rec, + next_buf)) { + + /* Cache failed. */ + if (*old_vers != NULL) + rec_offs_make_valid(*old_vers, index, *offsets); + + if (heap != NULL) + mem_heap_free(heap); + + return(err); + } + + ++prebuilt->n_transitional_vers_fetched; + } + } + + ulint status = 0; + + for (;;) { + mem_heap_t* prev_heap = heap; + + heap = mem_heap_create(1024); + + if (vrow) { + *vrow = NULL; + } + + /* If purge can't see the record then we can't rely on + the UNDO log record. */ + + bool purge_sees = trx_undo_prev_version_build( + rec, mtr, version, index, *offsets, heap, + &prev_version, NULL, vrow, status); + + err = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY; + + if (prev_heap != NULL) { + mem_heap_free(prev_heap); + } + + if (prev_version == NULL) { + + // All versions are fetched, reset to 0. + prebuilt->n_transitional_vers_fetched = 0; + + // It was a freshly inserted version + ut_ad(!vrow || !(*vrow)); + break; + } + + if (err == DB_MISSING_HISTORY) { + + break; + } + + *offsets = rec_get_offsets( + prev_version, index, *offsets, ULINT_UNDEFINED, + offset_heap); + ut_ad(rec_offs_validate(prev_version, index, *offsets)); + + ++n_vers_fetched; + + if (n_vers_fetched <= prebuilt->n_transitional_vers_fetched) { + + /* prev_version has already been fetched when + row_vers_build_for_flashback_range_read + is called previously. */ + version = prev_version; + + continue; + } + +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(prev_version, *offsets)); +#endif // UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG + + trx_id = row_get_rec_trx_id(prev_version, index, *offsets); + end_trx_id = row_get_rec_end_trx_id(prev_version, index, *offsets); + + flag = false; + + tlog_t tlog = read_tlog_by_trx_id(trx_id, max_trx_id); + if ((trx_id == prebuilt->trx->id + || tlog.status != TLOG_TRX_STATUS_IN_PROGRESS) + && tlog.status != TLOG_TRX_STATUS_ABORT) { + if (prebuilt->t_hint->value.from_to_value[1] >= + tlog.finish_time.tv_sec) { + if (end_trx_id != END_TRX_ID_MAX_VALUE) { + tlog_t end_tlog = read_tlog_by_trx_id(end_trx_id, max_trx_id); + if (end_tlog.status == TLOG_TRX_STATUS_IN_PROGRESS + || end_tlog.finish_time.tv_sec > + prebuilt->t_hint->value.from_to_value[0]) { + + flag = true; + } + } else { + + flag = true; + } + } + } + + if (flag) { + + if (old_vers == NULL) { + /*The version has not been fetched. */ + buf = static_cast( + mem_heap_alloc( + in_heap, rec_offs_size(*offsets))); + + *old_vers = rec_copy(buf, prev_version, *offsets); + + if (vrow && *vrow) { + *vrow = dtuple_copy(*vrow, in_heap); + dtuple_dup_v_fld(*vrow, in_heap); + } + } + + /* Cache version in record buffer */ + /* No need to cache the delete marked version, + otherwise duplicated rows will ve shown. */ + if (!rec_get_deleted_flag(prev_version, rec_offs_comp(*offsets)) && + !row_sel_flashback_cache_mysql_rec(prebuilt, + prev_version, NULL, index, *offsets, + mysql_rec, next_buf)) { + + --prebuilt->n_transitional_vers_fetched; + + break; + } + } + + ++prebuilt->n_transitional_vers_fetched; + + version = prev_version; + } + + if (*old_vers != NULL) { + rec_offs_make_valid(*old_vers, index, *offsets); + } + + if (heap != NULL) + mem_heap_free(heap); + + return(err); +} + +/*****************************************************************//** +Flashback +Constructs versions of a clustered index record which a flashback trx_id +read should see. We will store the subsequent versions into cache first. +@return DB_SUCCESS or DB_MISSING_HISTORY */ +dberr_t +row_vers_build_for_flashback_trx_id_read( +/*===============================*/ + const rec_t* rec, /*!< in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /*!< in: mtr holding the latch on rec */ + dict_index_t* index, /*!< in: the clustered index */ + ulint** offsets, /*!< in/out: offsets returned by + rec_get_offsets(rec, index) */ + ReadView* view, /*!< in: the consistent read view */ + mem_heap_t** offset_heap, /*!< in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap, /*!< in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers, /*!< out, own: old version, or NULL + if the history is missing or the record + does not exist in the view, that is, + it was freshly inserted afterwards */ + const dtuple_t**vrow, /*!< out: virtual row */ + row_prebuilt_t* prebuilt, /*!< in: current prebuilt */ + byte* &mysql_rec, /*!< in/out: buffer for fetched row + in MySQL format */ + byte* &next_buf) /*!< in/out: alias of mysql_rec, or + returned by row_sel_fetch_last_buf */ +{ + const rec_t* version; + rec_t* prev_version; + trx_id_t trx_id; + trx_id_t end_trx_id; + mem_heap_t* heap = NULL; + byte* buf; + dberr_t err = DB_SUCCESS; + ulint n_vers_fetched = 0; //The number of fetched versions + + ut_ad(index->is_clustered()); + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_S)); + + ut_ad(rec_offs_validate(rec, index, *offsets)); + + trx_id = row_get_rec_trx_id(rec, index, *offsets); + end_trx_id = row_get_rec_end_trx_id(rec, index, *offsets); + + ut_ad(!vrow || !(*vrow)); + + version = rec; + + if (view->changes_visible(trx_id, index->table->name) + && (prebuilt->t_hint->value.trx_id_value == (ulonglong)trx_id + || prebuilt->t_hint->value.trx_id_value == (ulonglong)end_trx_id)) { + + ++n_vers_fetched; + + if (!prebuilt->n_transitional_vers_fetched) { + + /* The version has not been fetched. */ + buf = static_cast( + mem_heap_alloc( + in_heap, rec_offs_size(*offsets))); + + *old_vers = rec_copy(buf, version, *offsets); + + if (vrow && *vrow) { + *vrow = dtuple_copy(*vrow, in_heap); + dtuple_dup_v_fld(*vrow, in_heap); + } + + /* Cache version in record buffer. */ + /* No need to cache the delete marked version, + otherwise duplicated rows will be shown. */ + if (!rec_get_deleted_flag(version, + page_rec_is_comp(version)) && + !row_sel_flashback_cache_mysql_rec(prebuilt, + version, NULL, index, *offsets, mysql_rec, + next_buf)) { + + /* Cache failed */ + if (*old_vers != NULL) + rec_offs_make_valid(*old_vers, index, *offsets); + + if (heap != NULL) + mem_heap_free(heap); + + return(err); + } + + ++prebuilt->n_transitional_vers_fetched; + } + } + + for (;;) { + mem_heap_t* prev_heap = heap; + + heap = mem_heap_create(1024); + + if (vrow) { + *vrow = NULL; + } + + /* If purge can't see the record then we can't rely on + the UNDO log record. */ + + bool purge_sees = trx_undo_prev_version_build( + rec, mtr, version, index, *offsets, heap, + &prev_version, NULL, vrow, 0); + + err = (purge_sees) ? DB_SUCCESS : DB_MISSING_HISTORY; + + if (prev_heap != NULL) { + mem_heap_free(prev_heap); + } + + if (prev_version == NULL) { + + /* All versions are fetched, reset to 0. */ + prebuilt->n_transitional_vers_fetched = 0; + + /* It was a freshly inserted version */ + ut_ad(!vrow || !(*vrow)); + break; + } + + if (err == DB_MISSING_HISTORY) { + break; + } + + *offsets = rec_get_offsets( + prev_version, index, *offsets, ULINT_UNDEFINED, + offset_heap); + + ++n_vers_fetched; + + if (n_vers_fetched <= prebuilt->n_transitional_vers_fetched) { + + /* prev_version has already been fetched when + row_vers_build_for_flashback_trx_id_read + is called previously.*/ + version = prev_version; + + continue; + } + +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(prev_version, *offsets)); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ + + trx_id = row_get_rec_trx_id(prev_version, index, *offsets); + end_trx_id = row_get_rec_end_trx_id(prev_version, index, *offsets); + + if (view->changes_visible(trx_id, index->table->name) + && (prebuilt->t_hint->value.trx_id_value == (ulonglong)trx_id + || prebuilt->t_hint->value.trx_id_value == (ulonglong)end_trx_id)) { + + if (old_vers == NULL) { + /* The version has not been fetched. */ + buf = static_cast( + mem_heap_alloc( + in_heap, rec_offs_size(*offsets))); + + *old_vers = rec_copy(buf, prev_version, *offsets); + + if (vrow && *vrow) { + *vrow = dtuple_copy(*vrow, in_heap); + dtuple_dup_v_fld(*vrow, in_heap); + } + } + + /* Cache version in record buffer. */ + /* No need to cache the delete marked version, + otherwise duplicated rows will be shown. */ + if (!rec_get_deleted_flag(prev_version, + page_rec_is_comp(prev_version)) && + !row_sel_flashback_cache_mysql_rec(prebuilt, + prev_version, NULL, index, *offsets, + mysql_rec, next_buf)) { + + --prebuilt->n_transitional_vers_fetched; + + break; + } + } + + ++prebuilt->n_transitional_vers_fetched; + + version = prev_version; + } + + if (*old_vers != NULL) { + rec_offs_make_valid(*old_vers, index, *offsets); + } + + mem_heap_free(heap); return(err); } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/srv/srv0srv.cc mysql-8.0.3-rc/storage/innobase/srv/srv0srv.cc --- mysql-8.0.3-rc/storage/innobase/srv/srv0srv.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/srv/srv0srv.cc 2020-04-19 23:52:38.838543864 +0800 @@ -72,6 +72,9 @@ Created 10/8/1995 Heikki Tuuri #include "usr0sess.h" #include "ut0crc32.h" #include "ut0mem.h" +#include "tlog0tlog.h" + + #ifdef INNODB_DD_TABLE /* true when upgrading. */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/srv/srv0start.cc mysql-8.0.3-rc/storage/innobase/srv/srv0start.cc --- mysql-8.0.3-rc/storage/innobase/srv/srv0start.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/srv/srv0start.cc 2020-04-19 23:52:38.838543864 +0800 @@ -78,6 +78,7 @@ Created 2/16/1996 Heikki Tuuri #include "trx0sys.h" #include "trx0trx.h" #include "ut0mem.h" +#include "tlog0tlog.h" #ifndef UNIV_HOTBACKUP # include @@ -2049,6 +2050,7 @@ srv_start(bool create_new_db, const char recv_sys_init(buf_pool_get_curr_size()); trx_sys_create(); lock_sys_create(srv_lock_table_size); + make_tlog(); srv_start_state_set(SRV_START_STATE_LOCK_SYS); /* Create i/o-handler threads: */ @@ -3140,6 +3142,9 @@ srv_shutdown() /* 7. Free the synchronisation infrastructure. */ sync_check_close(); + /* Flashback*/ + free_tlog(); + if (srv_print_verbose_log) { ib::info() << "Shutdown completed; log sequence number " << srv_shutdown_lsn; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/tlog/tlog0lru.cc mysql-8.0.3-rc/storage/innobase/tlog/tlog0lru.cc --- mysql-8.0.3-rc/storage/innobase/tlog/tlog0lru.cc 1970-01-01 08:00:00.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/tlog/tlog0lru.cc 2020-05-03 16:33:23.314542590 +0800 @@ -0,0 +1,465 @@ +#include +#include +#include +#include + +#include "tlog0lru.h" +#include "sync0sync.h" +#include "sync0debug.h" +#include "univ.i" +#include "sql/log.h" + +CachePage::CachePage(const unsigned int& page_size) : + k_size(page_size), buffer(new char[page_size]()), dirty(false) +{ + /* init page lock */ + pthread_rwlock_init(&lock, NULL); +} + +CachePage::~CachePage() +{ + pthread_rwlock_destroy(&lock); + delete[] buffer; +} + + +/** Read page from disk. +@return true if read successfully */ +bool +CachePage::load_page_physical( + const std::string& path, + const int& byteoff) +{ + std::ifstream fs(path, std::ifstream::binary | std::ifstream::in); + + if (fs) + { + /* locate the page in fs with get pointer */ + fs.seekg(byteoff, fs.beg); + if (!fs.read(buffer, k_size)) + { + memset(buffer, 0, k_size * sizeof(char)); + } + fs.close(); + } + else + { + memset(buffer, 0, k_size * sizeof(char)); + } + + return true; +} + +/** Write page to disk. +@return true if write successfully */ +bool +CachePage::save_page_physical( + const std::string& path, + const int& byteoff) +{ + std::ofstream fs(path, std::ofstream::binary | std::ofstream::out | + std::ofstream::in); + + /* file exists */ + bool suc = fs.is_open(); + + if (!suc) { + /* file does not exist, remove 'in' flag to create it */ + fs.open(path, std::ofstream::binary | std::ofstream::out); + suc = fs.is_open(); + } + + if (suc) { + /* locate the page in fs with put pointer */ + fs.seekp(byteoff, fs.beg); + fs.write(buffer, k_size); + fs.flush(); + + /* helper function to retrieve file descriptor. */ + auto helper = [](std::filebuf& fb) -> int + { + class Helper : public std::filebuf + { + public: + int handle() { return _M_file.fd(); } + }; + + return static_cast(fb).handle(); + }; + + /* sync tlog to disk. */ + fsync(helper(*fs.rdbuf())); + + fs.close(); + } + + return suc; +} + +DynaCachePage::DynaCachePage(const unsigned int& page_size) : + CachePage(page_size), pageno(0), status(TLOG_PAGE_STATUS_EMPTY) {} + +TlogLruCache::TlogLruCache( + const size_t& size, + const std::string& dir, + const std::string& file_title, + const unsigned int& struct_size, + const unsigned int& page_size, + const unsigned int& file_size) + : k_dir(dir), k_file_title(file_title), k_struct_size(struct_size), + k_page_size(page_size), k_struct_per_page(page_size / struct_size), + k_file_size(file_size), k_page_per_file(file_size / page_size) +{ + /* Init cache lock. */ + pthread_rwlock_init(&cache_lock, NULL); + + /* Insert empty pages into linklist. */ + for (size_t i = 0; i != size; ++i) lrulink.emplace_back(k_page_size); + + if (access(k_dir.c_str(), F_OK) != 0) { + pthread_rwlock_wrlock(&cache_lock); + + /* Create dirctory. */ + mkdir(k_dir.c_str(), S_IRUSR | S_IWUSR); + + /* Save first page to disk. */ + save_page(zero_page(0)); + + pthread_rwlock_unlock(&cache_lock); + } +} + +TlogLruCache::~TlogLruCache() +{ + pthread_rwlock_destroy(&cache_lock); +} + +/*******************************//** +Save all pages to disk. */ +void +TlogLruCache::flush_pages() +/*=======================*/ +{ + pthread_rwlock_wrlock(&cache_lock); + + for (Pageptr p = lrulink.begin(); p != lrulink.end(); ++p) { + save_page(p); + } + + pthread_rwlock_unlock(&cache_lock); +} + +/************************************************//** +Get the struct pointer on page by struct id, +@return true if successful */ +bool +TlogLruCache::operate_struct_by_id( +/*=============================*/ + unsigned int id, /*!< in: struct id */ + std::function f) /*!< in: function to read or modify values + on page, returns true if page is modified */ +{ + pthread_rwlock_wrlock(&cache_lock); + + Pageptr p = load_page(struct_id_to_pageno(id)); + if (p->status == TLOG_PAGE_STATUS_EMPTY) { + pthread_rwlock_unlock(&cache_lock); + return false; + } + p->dirty |= f(p->buffer + struct_id_to_byte_index(id)); + + pthread_rwlock_unlock(&cache_lock); + return true; +} + +/**********************************************//** +Read the struct pointer on page by struct id. +@return true if successful */ +bool +TlogLruCache::read_struct_by_id( +/*============================*/ + unsigned int id, /*!< in: struct id */ + std::function f) /*!< in: function to read values on page */ +{ + pthread_rwlock_wrlock(&cache_lock); + + Pageptr p = load_page(struct_id_to_pageno(id)); + if (p->status == TLOG_PAGE_STATUS_EMPTY) { + pthread_rwlock_unlock(&cache_lock); + return false; + } + f(p->buffer + struct_id_to_byte_index(id)); + + pthread_rwlock_unlock(&cache_lock); + return true; +} + +/************************************************************//** +Zero a new page if struct id is the first id on the page. */ +void +TlogLruCache::extend_page_by_struct_id( +/*===================================*/ + unsigned int id) /*!< in: struct id */ +{ + /* If not first id on page, return directly. */ + if (struct_id_to_struct_index(id)) return; + + /* Zero page only when id refers to the first struct on the page. */ + pthread_rwlock_wrlock(&cache_lock); + + zero_page(struct_id_to_pageno(id)); + + pthread_rwlock_unlock(&cache_lock); +} + +/** Move the page to the beginning of linklist. */ +inline +void +TlogLruCache::hit_page(Pageptr p) +{ + if (p == lrulink.begin()) return; + lrulink.splice(lrulink.begin(), lrulink, p, std::next(p)); +} + +/** Select the victim page from the end of the linklist, the status of the page +could only be TLOG_PAGE_STATUS_EMPTY or TLOG_PAGE_STATUS_VALID. It changes +nothing except for saving the page to disk. +@return iterator of victim page */ +Pageptr +TlogLruCache::pickup_victim_page() +{ + while (true) { + /* Go through linklist from the end to beginning. */ + Pageptr victim = lrulink.end(); + do { + std::advance(victim, -1); + } while (victim != lrulink.begin() && + victim->status != TLOG_PAGE_STATUS_EMPTY && + victim->status != TLOG_PAGE_STATUS_VALID); + + switch (victim->status) { + case TLOG_PAGE_STATUS_EMPTY: + /* The page is not in use, return it directly. */ + return victim; + + case TLOG_PAGE_STATUS_VALID: + /* The page is in use but valid, return it directly if not + dirty, or we need save the page to disk and continue. */ + if (!victim->dirty) return victim; + save_page(victim); + continue; + + default: + /* All pages loading or saving, wait io of the page which is the + end of linklist, cause it is the least recently used invalid + page. */ + wait_io(std::prev(lrulink.end())); + } + } +} + +/** Switch pageno of page p. */ +inline +void +TlogLruCache::switch_pageno( + Pageptr p, /*!< in/out: the iterator of page */ + int pageno) /*!< in: new pageno */ +{ + /* Remove from map. */ + if (pageno2page.find(p->pageno) != pageno2page.end()) { + pageno2page.erase(p->pageno); + } + + /* Set pageno. */ + p->pageno = pageno; + + /* Insert into map with new pageno. */ + pageno2page.insert(std::pair(pageno, p)); +} + + +/** Load page from cache or disk. Exclusive cache_lock is required. +@param[in] the pageno of page we need +@return the iterator to page loaded */ +Pageptr +TlogLruCache::load_page(const int pageno) +{ + while (true) { + auto it = pageno2page.find(pageno); + if (it != pageno2page.end()) { + /* Page is already in cache. Return if valid or wait IO. */ + switch (it->second->status) { + case TLOG_PAGE_STATUS_LOADING: + case TLOG_PAGE_STATUS_SAVING: + wait_io(it->second); + sql_print_error("wait io:%d", pageno); + continue; + case TLOG_PAGE_STATUS_VALID: + hit_page(it->second); + return it->second; + case TLOG_PAGE_STATUS_EMPTY: + break; + } + } + + /* Page not in cache. We need find a victim. */ + Pageptr victim = pickup_victim_page(); + + switch_pageno(victim, pageno); + victim->dirty = false; + + /* Begin loading, set status. */ + victim->status = TLOG_PAGE_STATUS_LOADING; + + pthread_rwlock_wrlock(&(victim->lock)); + pthread_rwlock_unlock(&cache_lock); + + bool suc = victim->load_page_physical(get_file_path(victim->pageno), + get_byteoff(victim->pageno)); + + pthread_rwlock_wrlock(&cache_lock); + pthread_rwlock_unlock(&(victim->lock)); + + /* Finish loading, restore status or set empty. */ + victim->status = suc ? TLOG_PAGE_STATUS_VALID : TLOG_PAGE_STATUS_EMPTY; + + hit_page(victim); + + return victim; + } +} + +/** Save page to disk. +@param[in,out] iterator of page to save*/ +void +TlogLruCache::save_page(Pageptr p) +{ + int pageno = p->pageno; + + /* Wait while saving. */ + while (pageno == p->pageno && p->status == TLOG_PAGE_STATUS_SAVING) { + wait_io(p); + } + + if (pageno != p->pageno || + !p->dirty || + p->status != TLOG_PAGE_STATUS_VALID) { + return; + } + + /* Begin saving, set status. */ + p->status = TLOG_PAGE_STATUS_SAVING; + + pthread_rwlock_wrlock(&(p->lock)); + pthread_rwlock_unlock(&cache_lock); + + bool suc = p->save_page_physical(get_file_path(p->pageno), + get_byteoff(p->pageno)); + + pthread_rwlock_wrlock(&cache_lock); + pthread_rwlock_unlock(&(p->lock)); + + /* Finish saving, restore status. */ + p->status = TLOG_PAGE_STATUS_VALID; + + /* If failed, the page is still dirty. */ + p->dirty = !suc; +} + +/** Set bits of page zero. +@param[in]: pageno of page +@return the page */ +Pageptr +TlogLruCache::zero_page(const int pageno) +{ + /* It must be a new page. */ + Pageptr p = pickup_victim_page(); + + switch_pageno(p, pageno); + + p->status = TLOG_PAGE_STATUS_VALID; + p->dirty = true; + + hit_page(p); + + memset(p->buffer, 0, k_page_size * sizeof(char)); + + return p; +} + +/** Wait page finishing loading or saving. +@param[in,out] iterator of page doing I/O*/ +void +TlogLruCache::wait_io(Pageptr p) +{ + pthread_rwlock_unlock(&cache_lock); + pthread_rwlock_rdlock(&(p->lock)); + pthread_rwlock_unlock(&(p->lock)); + pthread_rwlock_wrlock(&cache_lock); + + if (p->status == TLOG_PAGE_STATUS_LOADING || + p->status == TLOG_PAGE_STATUS_SAVING) { + if (pthread_rwlock_tryrdlock(&p->lock) == 0) { + if (p->status == TLOG_PAGE_STATUS_LOADING) { + p->status = TLOG_PAGE_STATUS_EMPTY; + } else { + p->status = TLOG_PAGE_STATUS_VALID; + p->dirty = true; + } + pthread_rwlock_unlock(&(p->lock)); + } + } +} + +/** Get filename of file contains page. No effects function. +@param[in] pageno of page +@return filename */ +inline +std::string +TlogLruCache::get_file_path(const int pageno) const +{ + const int fileno = pageno / k_page_per_file; + return k_dir + '/' + k_file_title + '_' + std::to_string(fileno); +} + +/** +@param[in] pageno of page +@return bytes offset */ +inline +int +TlogLruCache::get_byteoff(const int pageno) const +{ + const int pageoff = pageno % k_page_per_file; + return pageoff * k_page_size; +} + +/** +@param[in] struct id +@return pageno */ +inline +unsigned int +TlogLruCache::struct_id_to_pageno(unsigned int id) const +{ + return id / k_struct_per_page; +} + +/** +@param[in] struct id +@return struct index*/ +inline +unsigned int +TlogLruCache::struct_id_to_struct_index(unsigned int id) const +{ + return id % k_struct_per_page; +} + +/** +@param[in] struct id +@return byte index */ +inline +unsigned int +TlogLruCache::struct_id_to_byte_index(unsigned int id) const +{ + return struct_id_to_struct_index(id) * k_struct_size; +} + diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/tlog/tlog0tlog.cc mysql-8.0.3-rc/storage/innobase/tlog/tlog0tlog.cc --- mysql-8.0.3-rc/storage/innobase/tlog/tlog0tlog.cc 1970-01-01 08:00:00.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/tlog/tlog0tlog.cc 2020-05-03 15:50:42.050680087 +0800 @@ -0,0 +1,289 @@ +#include "tlog0tlog.h" +#include "tlog0lru.h" +#include "trx0sys.h" /* trx_sys_get_max_trx_id */ + +#define SEC_USEC_UNIT 1000000 + +#define TLOG_DIR_NAME "trx_log" /* directory name of tlogs */ +#define TLOG_FILE_TITLE "tlog" /* filename title of each tlog */ +#define LRU_CACHE_NUM 20 +#define LRU_CACHE_SIZE (1 << 12) /* size of lru cache */ +#define TLOG_SIZE_BYTE 16 /* size of each tlog */ +#define PAGE_SIZE_BYTE (1 << 12) /* size of each tlog page */ +#define FILE_SIZE_BYTE (1 << 20) /* size of each tlog file */ +#define TLOG_NUM_PER_PAGE (PAGE_SIZE_BYTE / TLOG_SIZE_BYTE) + +TlogLruCache *tlog_file_caches[LRU_CACHE_NUM]; + +size_t struct_item_offset::cur_offset = 0; +/* tlog items */ +const struct_item status_tlog(1, 0); +const struct_item start_time_tlog(7); +const struct_item finish_time_tlog(7); + +/*****************************************************//** +Prepare for tlog. It must be called ONCE when database startup. */ +void +make_tlog() +/*==================*/ +{ + for (auto& tlog_file_cache : tlog_file_caches) { + tlog_file_cache = new TlogLruCache(LRU_CACHE_SIZE, TLOG_DIR_NAME, + TLOG_FILE_TITLE, TLOG_SIZE_BYTE, PAGE_SIZE_BYTE, FILE_SIZE_BYTE); + } +} + +/*****************************************************//** +Free data in memory. It must be called ONCE when database shutdown. */ +void +free_tlog() +/*==================*/ +{ + flush_tlog(); + for (auto& tlog_file_cache : tlog_file_caches) { + delete(tlog_file_cache); + } +} + +/*****************************************************//** +Record transaction when starting and write redo log. */ +void +record_start_tlog( +/*==================*/ + trx_id_t id, /*!< in: transaction id */ + long sec, /*!< in: start time sec */ + long usec) /*!< in: start time usec */ +{ + if (id == 0) return; + + tlog_trx_status status = TLOG_TRX_STATUS_IN_PROGRESS; + auto usec_val = time2usec(sec, usec); + + record_tlog_low(id, status, usec_val); +} + +/*****************************************************//** +Get tlog of transaction with transaction id. +@param[in] transaction id +@param[in] trx_sys->max_trx_id +@return tlog to transaction */ +tlog_t +read_tlog_by_trx_id(const trx_id_t& id, const trx_id_t& max_trx_id) +/*==================*/ +{ + tlog_t tlog{TLOG_TRX_STATUS_NOT_STARTED, timeval{0, 0}, timeval{0, 0}}; + + /* read-only transaction, return default. */ + if (id == 0) return tlog; + + /* transaction has not started yet, return default. */ + if (id >= max_trx_id) + return tlog; + + /* read tlog by trx_id */ + tlog_file_caches[(id / TLOG_NUM_PER_PAGE) % LRU_CACHE_NUM]->read_struct_by_id(id, [&tlog](char* tlog_p) + { + auto get_timeval = [](edge_t usec) -> timeval + { + const long int unit = SEC_USEC_UNIT; + return timeval{static_cast<__time_t>(usec / unit), + static_cast<__suseconds_t>(usec % unit)}; + }; + tlog.status = get_from_page(tlog_p, ::status_tlog); + tlog.start_time = get_timeval( + get_from_page(tlog_p, ::start_time_tlog)); + tlog.finish_time = get_timeval( + get_from_page(tlog_p, ::finish_time_tlog)); + }); + + return tlog; +} + +std::vector +read_tlog_by_trx_ids(const trx_id_t& left_id, const trx_id_t& right_id) +{ + trx_id_t max_trx_id = trx_sys_get_max_trx_id(); + + std::vector res; + for (auto id = left_id; id <= right_id; ++id) { + res.push_back( + tlog_info(id, read_tlog_by_trx_id(id, max_trx_id))); + } + return res; +} + +std::vector +read_tlog_finish_between( + timeval& left_time, + timeval& right_time, + trx_id_t max_trx_id) +{ + std::function less_than = + [](timeval& t1, timeval& t2) + { + return t1.tv_sec < t2.tv_sec || + (t1.tv_sec == t2.tv_sec && t1.tv_usec < t2.tv_usec); + }; + + std::vector res; + + for (trx_id_t id = 1; id != max_trx_id + 1; ++id) + { + tlog_t tlog = read_tlog_by_trx_id(id, max_trx_id); + if (!less_than(tlog.finish_time, left_time) && + less_than(tlog.finish_time, right_time)) { + res.push_back(tlog_info(id, tlog)); + } + } + + return res; +} + +/***********************************************************//** +Get active transaction list with time point. +@return the lowest unassigned transaction id at the point time */ +trx_id_t +get_trx_id_list_by_time_point( +/*=======================*/ + std::vector& trx_ids, /*!< in/out: active transaction + id list */ + trx_id_t max_trx_id, /*!< in: current lowest + unassigned transaction id */ + const long& time_sec) /*!< in: time point, sec */ +{ + if (!trx_ids.empty()) trx_ids.clear(); + + std::function less_than_timeval = + [time_sec](timeval& time) -> bool + { + return time_sec < time.tv_sec || + (time_sec == time.tv_sec && 0 < time.tv_usec); + }; + + trx_id_t id = 0; + for (; id != max_trx_id + 1; ++id) + { + tlog_t tlog = read_tlog_by_trx_id(id, max_trx_id); + if (less_than_timeval(tlog.start_time)) { + /* Return the first unallocated id. Cause start_time + progressively increase by id, no more active trx_ids + will be pushed back. */ + return id; + } + if (tlog.status == TLOG_TRX_STATUS_IN_PROGRESS || + (tlog.status == TLOG_TRX_STATUS_COMMIT && + less_than_timeval(tlog.finish_time))) + { + /* Push back the active trx_id. */ + trx_ids.push_back(id); + } + } + + return max_trx_id + 1; +} + +/** Record transaction when finish and write redo log. */ +void +record_finish_tlog( + trx_id_t id, /*!< in: transaction id */ + const char* op_info,/*!< in: transaction status(abort or commit) */ + mtr_t *mtr) /*!< in/out: mini transaction */ +{ + if (id == 0) return; + + tlog_trx_status status; + + if (!strcmp(op_info, "committing")) + status = TLOG_TRX_STATUS_COMMIT; + else if (!strcmp(op_info, "aborting")) + status = TLOG_TRX_STATUS_ABORT; + else + return; + + auto cur_usec = get_cur_usec(); + record_tlog_write_log(id, status, cur_usec, mtr); + record_tlog_low(id, status, cur_usec); +} + +/** +@return time in usec */ +constexpr +edge_t +time2usec(long sec, long usec) +{ + return sec * 1000000 + usec; +} + +/** +@return current time in usec */ +edge_t +get_cur_usec() +{ + struct timeval t; + gettimeofday(&t, NULL); + return time2usec(t.tv_sec, t.tv_usec); +} + +/** Flush data from memory to disk. */ +void +flush_tlog() +{ + for (auto& tlog_file_cache : tlog_file_caches) { + tlog_file_cache->flush_pages(); + } +} + +/** Record transaction when its status changes. */ +void +record_tlog_low( + const trx_id_t& id, /*!< in: transaction id */ + const tlog_trx_status& status, /*!< in: transaction status */ + const edge_t& usec) /*!< in: when transaction status changes */ +{ + tlog_file_caches[(id / TLOG_NUM_PER_PAGE) % LRU_CACHE_NUM]->operate_struct_by_id(id, [&status, &usec](char* tlog_p) -> bool + { + set_to_page(tlog_p, ::status_tlog, status); + if (status == TLOG_TRX_STATUS_IN_PROGRESS) + set_to_page(tlog_p, ::start_time_tlog, usec); + else + { + set_to_page(tlog_p, ::finish_time_tlog, usec); + } + + /* Set page dirty. */ + return true; + }); +} + +/** Write the redo log for recording tlog. */ +void +record_tlog_write_log( + const trx_id_t& id, /*!< in: transaction id */ + const tlog_trx_status& status, /*!< in: transaction status */ + const edge_t& usec, /*!< in: when transaction status changes */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + /* Allocate memory for mlog type, transaction id, + transaction status and usec. */ + byte* log_ptr = mlog_open(mtr, + 1 + sizeof(trx_id_t) + 1 + sizeof(edge_t)); + + /* Write type of the redo log record */ + mach_write_to_1(log_ptr, MLOG_TLOG_RECORD); + ++log_ptr; + + /* Write transaction id */ + log_ptr += mach_u64_write_compressed(log_ptr, id); + + /* Write transaction status */ + mach_write_to_1(log_ptr, status); + ++log_ptr; + + /* Write the time when transaction status changes */ + log_ptr += mach_u64_write_compressed(log_ptr, usec); + + mtr->added_rec(); + + mlog_close(mtr, log_ptr); +} + diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/trx/trx0purge.cc mysql-8.0.3-rc/storage/innobase/trx/trx0purge.cc --- mysql-8.0.3-rc/storage/innobase/trx/trx0purge.cc 2019-12-05 18:50:16.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/trx/trx0purge.cc 2020-05-03 16:33:58.250541247 +0800 @@ -53,6 +53,10 @@ Created 3/26/1996 Heikki Tuuri #include "clone0api.h" #include "mysqld.h" +#include "current_thd.h" +#include "dict0dd.h" +#include + /** Maximum allowable purge history length. <=0 means 'infinite'. */ ulong srv_max_purge_lag = 0; @@ -1525,9 +1529,46 @@ trx_purge_read_undo_rec( mtr_commit(&mtr); } else { - offset = 0; - undo_no = 0; - undo_rseg_space = SPACE_UNKNOWN; + /* Flashback: the in-place update should also be attached */ + mtr_t mtr; + trx_undo_rec_t* undo_rec = NULL; + + mtr_start(&mtr); + + /* Cannot call dd_table_open_on_id() before server is fully up */ + if (!mysqld_server_started) { + offset = 0; + undo_no = 0; + undo_rseg_space = SPACE_UNKNOWN; + + purge_sys->offset = offset; + purge_sys->page_no = page_no; + purge_sys->iter.undo_no = undo_no; + purge_sys->iter.undo_rseg_space = undo_rseg_space; + + purge_sys->next_stored = TRUE; + + return; + } + + undo_rec = trx_undo_get_first_rec( + purge_sys->rseg->space_id, + page_size, + purge_sys->hdr_page_no, + purge_sys->hdr_offset, RW_S_LATCH, &mtr); + + if (undo_rec != NULL) { + offset = page_offset(undo_rec); + undo_no = trx_undo_rec_get_undo_no(undo_rec); + undo_rseg_space = purge_sys->rseg->space_id; + page_no = page_get_page_no(page_align(undo_rec)); + } else { + offset = 0; + undo_no = 0; + undo_rseg_space = SPACE_UNKNOWN; + } + + mtr_commit(&mtr); } purge_sys->offset = offset; @@ -1617,7 +1658,6 @@ trx_purge_get_next_rec( for (;;) { ulint type; trx_undo_rec_t* next_rec; - ulint cmpl_info; /* Try first to find the next record which requires a purge operation from the same page of the same undo log */ @@ -1641,15 +1681,21 @@ trx_purge_get_next_rec( break; } - cmpl_info = trx_undo_rec_get_cmpl_info(rec2); - if (trx_undo_rec_get_extern_storage(rec2)) { break; } - if ((type == TRX_UNDO_UPD_EXIST_REC) - && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - + /* In original mysql, the undo record + which meets (cmpl_indo & UPD_NODE_NO_ORD_CHANGE) + is not returned and not purged. + We need it to be purged and then to be restored + into history table. + Even thougn all the UPD_NODE_NO_ORD_CHANGE records + are returned here, only the records of original table + are to be actually purged and restored, records of + normal table are not purged in fact. + See row_purge_parse_undo_rec */ + if (type == TRX_UNDO_UPD_EXIST_REC) { break; } } @@ -1988,6 +2034,8 @@ trx_purge( { que_thr_t* thr = NULL; ulint n_pages_handled; + MDL_ticket* mdl = NULL; + THD* thd = current_thd; ut_a(n_purge_threads > 0); @@ -1996,6 +2044,8 @@ trx_purge( /* The number of tasks submitted should be completed. */ ut_a(purge_sys->n_submitted == purge_sys->n_completed); + thd_acquire_flashback_lock(thd, &mdl); + rw_lock_x_lock(&purge_sys->latch); purge_sys->view_active = false; @@ -2008,6 +2058,7 @@ trx_purge( #ifdef UNIV_DEBUG if (srv_purge_view_update_only_debug) { + thd_release_flashback_lock(thd, &mdl); return(0); } #endif /* UNIV_DEBUG */ @@ -2077,6 +2128,8 @@ run_synchronously: MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1); MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled); + thd_release_flashback_lock(thd, &mdl); + return(n_pages_handled); } diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/trx/trx0rec.cc mysql-8.0.3-rc/storage/innobase/trx/trx0rec.cc --- mysql-8.0.3-rc/storage/innobase/trx/trx0rec.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/trx/trx0rec.cc 2020-05-03 16:35:01.894538799 +0800 @@ -1018,6 +1018,11 @@ trx_undo_page_report_modify( ptr += mach_u64_write_compressed(ptr, trx_read_roll_ptr(field)); + /* Write current trx_id as DB_END_TRX_ID. + Read this and write to field DB_END_TRX_ID, + when making up and dumping old version. */ + ptr += mach_u64_write_compressed(ptr, trx->id); + /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record which will be modified in the clustered index */ @@ -1057,7 +1062,15 @@ trx_undo_page_report_modify( return(0); } - ulint n_updated = upd_get_n_fields(update); + ulint n_updated; + + if (index->table->flags2 & DICT_TF2_ORIG_TABLE) { + + n_updated = dict_index_get_n_fields(index) - DATA_N_SYS_COLS; + } else { + + n_updated = upd_get_n_fields(update); + } /* If this is an online update while an inplace alter table is in progress and the table has virtual column, we will @@ -1083,6 +1096,19 @@ trx_undo_page_report_modify( ptr += mach_write_compressed(ptr, n_updated); + /* Store all fields into undo record (flashback) */ + if (index->table->flags2 & DICT_TF2_ORIG_TABLE) { + for (uint j = DATA_N_SYS_COLS; j < dict_index_get_n_fields(index); j++) { + ulint pos = j; + ptr += mach_write_compressed(ptr, pos); + field = rec_get_nth_field(rec, offsets, pos, &flen); + ptr += mach_write_compressed(ptr, flen); + ut_memcpy(ptr, field, flen); + ptr += flen; + } + + } else { + /* Sotre fields have been modified into undo record */ for (i = 0; i < upd_get_n_fields(update); i++) { upd_field_t* fld = upd_get_nth_field(update, i); @@ -1216,6 +1242,8 @@ trx_undo_page_report_modify( } } } + + } // flashback } /* Reset the first_v_col, so to put the virtual column undo @@ -1463,6 +1491,7 @@ trx_undo_update_rec_get_sys_cols( general parameters */ trx_id_t* trx_id, /*!< out: trx id */ roll_ptr_t* roll_ptr, /*!< out: roll ptr */ + trx_id_t* end_trx_id, /*!< out: end trx id */ ulint* info_bits) /*!< out: info bits state */ { /* Read the state of the info bits */ @@ -1473,6 +1502,7 @@ trx_undo_update_rec_get_sys_cols( *trx_id = mach_u64_read_next_compressed(&ptr); *roll_ptr = mach_u64_read_next_compressed(&ptr); + *end_trx_id = mach_u64_read_next_compressed(&ptr); return(const_cast(ptr)); } @@ -1498,6 +1528,7 @@ trx_undo_update_rec_get_update( the update vector */ trx_id_t trx_id, /*!< in: transaction id from this undo record */ roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */ + trx_id_t end_trx_id,/*!< in: transaction id that create this undo record (flashback) */ ulint info_bits,/*!< in: info bits from this undo record */ trx_t* trx, /*!< in: transaction */ mem_heap_t* heap, /*!< in: memory heap from which the memory @@ -1521,7 +1552,7 @@ trx_undo_update_rec_get_update( n_fields = 0; } - update = upd_create(n_fields + 2, heap); + update = upd_create(n_fields + 3, heap); update->info_bits = info_bits; @@ -1549,6 +1580,19 @@ trx_undo_update_rec_get_update( index, trx); dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN); + /* Flashback: store end trx id to update vector additionally */ + + upd_field = upd_get_nth_field(update, n_fields + 2); + + buf = static_cast(mem_heap_alloc(heap, DATA_TRX_ID_LEN)); + + trx_write_trx_id(buf, end_trx_id); + + upd_field_set_field_no(upd_field, + index->get_sys_col_pos(DATA_END_TRX_ID), + index, trx); + dfield_set_data(&(upd_field->new_val), buf, DATA_END_TRX_ID_LEN); + /* Store then the updated ordinary columns to the update vector */ for (i = 0; i < n_fields; i++) { @@ -2279,6 +2323,7 @@ trx_undo_prev_version_build( table_id_t table_id; trx_id_t trx_id; roll_ptr_t roll_ptr; + trx_id_t end_trx_id; upd_t* update; byte* ptr; ulint info_bits; @@ -2317,6 +2362,7 @@ trx_undo_prev_version_build( /* We are fetching the record being purged */ undo_rec = trx_undo_get_undo_rec_low( roll_ptr, heap, is_temp); + } else { /* The undo record may already have been purged, during purge or semi-consistent read. */ @@ -2335,7 +2381,7 @@ trx_undo_prev_version_build( } ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); + &end_trx_id, &info_bits); /* (a) If a clustered index record version is such that the trx id stamp in it is bigger than purge_sys->view, then the @@ -2362,7 +2408,7 @@ trx_undo_prev_version_build( ptr = trx_undo_rec_skip_row_ref(ptr, index); ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id, - roll_ptr, info_bits, + roll_ptr, end_trx_id, info_bits, NULL, heap, &update); ut_a(ptr); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/trx/trx0roll.cc mysql-8.0.3-rc/storage/innobase/trx/trx0roll.cc --- mysql-8.0.3-rc/storage/innobase/trx/trx0roll.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/trx/trx0roll.cc 2020-04-19 23:52:38.834543864 +0800 @@ -50,6 +50,7 @@ Created 3/26/1996 Heikki Tuuri #include "os0thread-create.h" #include #include "dict0dd.h" +#include "tlog0tlog.h" /** This many pages must be undone before a truncate is tried within rollback */ diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/trx/trx0sys.cc mysql-8.0.3-rc/storage/innobase/trx/trx0sys.cc --- mysql-8.0.3-rc/storage/innobase/trx/trx0sys.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/trx/trx0sys.cc 2020-04-19 23:52:38.834543864 +0800 @@ -664,7 +664,8 @@ trx_sys_validate_trx_list_low( prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) { check_trx_state(trx); - ut_a(prev_trx == NULL || prev_trx->id > trx->id); + ut_a(prev_trx == NULL || prev_trx->purge_restore_trx || + prev_trx->id > trx->id); } return(true); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/innobase/trx/trx0trx.cc mysql-8.0.3-rc/storage/innobase/trx/trx0trx.cc --- mysql-8.0.3-rc/storage/innobase/trx/trx0trx.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/innobase/trx/trx0trx.cc 2020-04-26 09:00:49.098925536 +0800 @@ -29,6 +29,7 @@ Created 3/26/1996 Heikki Tuuri #include #include +#include #include "btr0sea.h" #include "dict0dd.h" @@ -486,6 +487,8 @@ trx_create_low() trx_free(). */ ut_a(trx->mod_tables.size() == 0); + trx->purge_restore_trx = false; + return(trx); } @@ -1304,7 +1307,7 @@ trx_assign_rseg_temp( if (trx->id == 0) { mutex_enter(&trx_sys->mutex); - trx->id = trx_sys_get_new_trx_id(); + trx_sys_get_new_trx_id(trx); trx_sys->rw_trx_ids.push_back(trx->id); @@ -1396,6 +1399,12 @@ trx_start_low( read only can write to temporary tables, we put those on the RO list too. */ + /* Flashabck: set trx start time */ + struct timeval time; + gettimeofday(&time, NULL); + trx->trx_start_sec = time.tv_sec; + trx->trx_start_usec = time.tv_usec; + if (!trx->read_only && (trx->mysql_thd == 0 || read_write || trx->ddl_operation)) { @@ -1406,7 +1415,7 @@ trx_start_low( trx_sys_mutex_enter(); - trx->id = trx_sys_get_new_trx_id(); + trx_sys_get_new_trx_id(trx); trx_sys->rw_trx_ids.push_back(trx->id); @@ -1446,7 +1455,7 @@ trx_start_low( ut_ad(!srv_read_only_mode); - trx->id = trx_sys_get_new_trx_id(); + trx_sys_get_new_trx_id(trx); trx_sys->rw_trx_ids.push_back(trx->id); @@ -1969,7 +1978,7 @@ trx_commit_in_memory( } else { - if (trx->id > 0) { + if (trx->id > 0 && !trx->purge_restore_trx) { /* For consistent snapshot, we need to remove current transaction from running transaction id list for mvcc before doing commit and releasing locks. */ @@ -1998,6 +2007,10 @@ trx_commit_in_memory( } } + // flashback + if (trx->purge_restore_trx) + trx->purge_restore_trx = false; + if (trx->rsegs.m_redo.rseg != NULL) { trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; ut_ad(rseg->trx_ref_count > 0); @@ -2154,6 +2167,10 @@ trx_commit_low( serialised = trx_write_serialisation_history(trx, mtr); + /* Flashback */ + if (!trx->purge_restore_trx) + record_finish_tlog(trx->id, trx->op_info, mtr); + /* The following call commits the mini-transaction, making the whole transaction committed in the file-based world, at this log sequence number. The transaction becomes 'durable' when @@ -3186,7 +3203,12 @@ trx_set_rw_mode( mutex_enter(&trx_sys->mutex); ut_ad(trx->id == 0); - trx->id = trx_sys_get_new_trx_id(); + trx_sys_get_new_trx_id(trx); + + if (trx->purge_restore_trx) { + mutex_exit(&trx_sys->mutex); + return; + } trx_sys->rw_trx_ids.push_back(trx->id); diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/perfschema/pfs_column_types.cc mysql-8.0.3-rc/storage/perfschema/pfs_column_types.cc --- mysql-8.0.3-rc/storage/perfschema/pfs_column_types.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/perfschema/pfs_column_types.cc 2020-04-23 18:26:53.225798987 +0800 @@ -46,6 +46,7 @@ static s_object_type_map object_type_map {OBJECT_TYPE_LOCKING_SERVICE, {C_STRING_WITH_LEN("ACL CACHE")}}, {OBJECT_TYPE_BACKUP_LOCK, {C_STRING_WITH_LEN("BACKUP LOCK")}}, {OBJECT_TYPE_RESOURCE_GROUPS, {C_STRING_WITH_LEN("RESOURCE_GROUPS")}}, + {OBJECT_TYPE_FLASHBACK, {C_STRING_WITH_LEN("FLASHBACK")}}, {NO_OBJECT_TYPE, {C_STRING_WITH_LEN("")}}}; void diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/perfschema/pfs_column_types.h mysql-8.0.3-rc/storage/perfschema/pfs_column_types.h --- mysql-8.0.3-rc/storage/perfschema/pfs_column_types.h 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/perfschema/pfs_column_types.h 2020-04-23 18:30:10.409791404 +0800 @@ -217,12 +217,13 @@ enum enum_object_type OBJECT_TYPE_LOCKING_SERVICE = 12, OBJECT_TYPE_ACL_CACHE = 13, OBJECT_TYPE_BACKUP_LOCK = 14, - OBJECT_TYPE_RESOURCE_GROUPS = 15 + OBJECT_TYPE_RESOURCE_GROUPS = 15, + OBJECT_TYPE_FLASHBACK = 16 }; /** Integer, first value of @sa enum_object_type. */ #define FIRST_OBJECT_TYPE (static_cast(OBJECT_TYPE_EVENT)) /** Integer, last value of @sa enum_object_type. */ -#define LAST_OBJECT_TYPE (static_cast(OBJECT_TYPE_RESOURCE_GROUPS)) +#define LAST_OBJECT_TYPE (static_cast(OBJECT_TYPE_FLASHBACK)) /** Integer, number of values of @sa enum_object_type. */ #define COUNT_OBJECT_TYPE (LAST_OBJECT_TYPE - FIRST_OBJECT_TYPE + 1) diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/perfschema/table_events_waits.cc mysql-8.0.3-rc/storage/perfschema/table_events_waits.cc --- mysql-8.0.3-rc/storage/perfschema/table_events_waits.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/perfschema/table_events_waits.cc 2020-04-23 18:28:34.541795091 +0800 @@ -509,6 +509,11 @@ table_events_waits_common::make_metadata m_row.m_object_schema_length= mdl->db_name_length(); m_row.m_object_name_length= mdl->name_length(); break; + case MDL_key::FLASHBACK: + m_row.m_object_type = "FLASHBACK"; + m_row.m_object_type_length = 9; + m_row.m_object_schema_length = 0; + m_row.m_object_name_length = 0; case MDL_key::NAMESPACE_END: default: m_row.m_object_type_length = 0; diff -uprN '--exclude-from=exclude.txt' '--exclude=mysql-test' mysql-8.0.3-rc/storage/perfschema/table_helper.cc mysql-8.0.3-rc/storage/perfschema/table_helper.cc --- mysql-8.0.3-rc/storage/perfschema/table_helper.cc 2017-09-19 19:33:50.000000000 +0800 +++ mysql-8.0.3-rc/storage/perfschema/table_helper.cc 2020-04-23 18:24:00.441805632 +0800 @@ -847,6 +847,10 @@ PFS_object_row::make_row(const MDL_key * m_schema_name_length= mdl->db_name_length(); m_object_name_length= mdl->name_length(); break; + case MDL_key::FLASHBACK: + m_object_type = OBJECT_TYPE_FLASHBACK; + m_schema_name_length = 0; + m_object_name_length = 0; case MDL_key::NAMESPACE_END: default: m_object_type = NO_OBJECT_TYPE;