commit 26900e9b5ef16c86b64b1f0473f293f0fb4fcd8b Author: GAO Xiaoxin Date: Sat Sep 24 18:01:01 2022 +0800 Issue #108604 Suggest to use zstd for innodb page compression Currently innodb page compression only support zlib and lz4, while the zstd algorithm may provide similar(or little better) compress ratio and much better speed for both compression and decompression. And for zstd, mysql 8.0 has already used this algorithm for binlog compression, it could be safe to use zstd for page compression. The zstd-1.5.0 has already been placed in the mysql source folder "extra/zstd" , so it is easy to enable page compression to use zstd algorithm. diff --git a/storage/innobase/clone/clone0desc.cc b/storage/innobase/clone/clone0desc.cc index 4798716..15c8280 100644 --- a/storage/innobase/clone/clone0desc.cc +++ b/storage/innobase/clone/clone0desc.cc @@ -631,6 +631,7 @@ static const uint CLONE_DESC_FILE_FLAG_RENAMED = 4; static const uint CLONE_DESC_FILE_FLAG_DELETED = 5; /** Clone File Flag: File metadata has encryption key. */ static const uint CLONE_DESC_FILE_HAS_KEY = 6; +static const uint CLONE_DESC_FILE_FLAG_ZSTD = 7; /** File Metadata: Tablespace ID in 4 bytes */ static const uint CLONE_FILE_SPACE_ID_OFFSET = CLONE_FILE_FLAGS_OFFSET + 2; @@ -697,6 +698,8 @@ void Clone_Desc_File_MetaData::serialize(byte *&desc_file, uint &len, DESC_SET_FLAG(file_flags, CLONE_DESC_FILE_FLAG_ZLIB); } else if (m_file_meta.m_compress_type == Compression::LZ4) { DESC_SET_FLAG(file_flags, CLONE_DESC_FILE_FLAG_LZ4); + } else if (m_file_meta.m_compress_type == Compression::ZSTD) { + DESC_SET_FLAG(file_flags, CLONE_DESC_FILE_FLAG_ZSTD); } /* Set file encryption type */ if (m_file_meta.m_encrypt_type == Encryption::AES) { @@ -785,6 +788,8 @@ bool Clone_Desc_File_MetaData::deserialize(const byte *desc_file, m_file_meta.m_compress_type = Compression::ZLIB; } else if (DESC_CHECK_FLAG(file_flags, CLONE_DESC_FILE_FLAG_LZ4)) { m_file_meta.m_compress_type = Compression::LZ4; + } else if (DESC_CHECK_FLAG(file_flags, CLONE_DESC_FILE_FLAG_ZSTD)) { + m_file_meta.m_compress_type = Compression::ZSTD; } /* Get file encryption information */ diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index efc2e5b..0b93ad9 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -9318,6 +9318,7 @@ dberr_t fil_set_compression(space_id_t space_id, const char *algorithm) { switch (srv_debug_compress) { case Compression::LZ4: case Compression::ZLIB: + case Compression::ZSTD: case Compression::NONE: compression.m_type = static_cast(srv_debug_compress); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9d8bb6f..edb0c98 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -2458,6 +2458,9 @@ dberr_t Compression::check(const char *algorithm, Compression *compression) { } else if (innobase_strcasecmp(algorithm, "lz4") == 0) { compression->m_type = LZ4; + } else if (innobase_strcasecmp(algorithm, "zstd") == 0) { + compression->m_type = ZSTD; + } else { return (DB_UNSUPPORTED); } @@ -2481,6 +2484,7 @@ bool Compression::validate(const Compression::Type type) { case NONE: case ZLIB: case LZ4: + case ZSTD: break; default: ret = false; @@ -21909,6 +21913,12 @@ static MYSQL_SYSVAR_UINT( ", 1 is fastest, 9 is best compression and default is 6.", nullptr, nullptr, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); +static MYSQL_SYSVAR_UINT( + page_compression_level_zstd, page_zstd_level, PLUGIN_VAR_RQCMDARG, + "Compression level used for compressed row format using zstd. Higher value higher radio" + "1 is fastest, 22 is best compression and default is 3.", + nullptr, nullptr, DEFAULT_COMPRESSION_LEVEL_ZSTD, 1, 22, 0); + static MYSQL_SYSVAR_BOOL( log_compressed_pages, page_zip_log_pages, PLUGIN_VAR_OPCMDARG, "Enables/disables the logging of entire compressed page images." @@ -22877,6 +22887,7 @@ static SYS_VAR *innobase_system_variables[] = { MYSQL_SYSVAR(commit_concurrency), MYSQL_SYSVAR(concurrency_tickets), MYSQL_SYSVAR(compression_level), + MYSQL_SYSVAR(page_compression_level_zstd), MYSQL_SYSVAR(ddl_buffer_size), MYSQL_SYSVAR(ddl_threads), MYSQL_SYSVAR(data_file_path), diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 7a91ff5..abb8a1f 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -82,6 +82,10 @@ extern unsigned long long os_fsync_threshold; /** File offset in bytes */ typedef uint64_t os_offset_t; +constexpr uint32_t DEFAULT_COMPRESSION_LEVEL_ZSTD = 3; + +extern uint page_zstd_level; + namespace file { /** Blocks for doing IO, used in the transparent compression and encryption code. */ diff --git a/storage/innobase/os/file.cc b/storage/innobase/os/file.cc index 776038a..5058f4b 100644 --- a/storage/innobase/os/file.cc +++ b/storage/innobase/os/file.cc @@ -50,6 +50,7 @@ external tools. */ #include #include +#include /** Convert to a "string". @param[in] type The compression type @@ -62,6 +63,8 @@ const char *Compression::to_string(Type type) { return ("Zlib"); case LZ4: return ("LZ4"); + case ZSTD: + return ("ZSTD"); } ut_d(ut_error); @@ -231,7 +234,18 @@ dberr_t Compression::deserialize(bool dblwr_read, byte *src, byte *dst, } break; - + case Compression::ZSTD: { + size_t csize = ZSTD_decompress(dst, header.m_original_size, ptr, header.m_compressed_size); + if (ZSTD_isError(csize)) { + if (allocated) { + ut::free(dst); + } + return (DB_IO_DECOMPRESS_FAIL); + } + ut_ad(static_cast(csize) <= len); + len = static_cast(csize); + break; + } default: #ifdef UNIV_NO_ERR_MSGS ib::error() diff --git a/storage/innobase/os/file.h b/storage/innobase/os/file.h index 5f9865d..97f3f35 100644 --- a/storage/innobase/os/file.h +++ b/storage/innobase/os/file.h @@ -61,7 +61,10 @@ struct Compression { ZLIB = 1, /** Use LZ4 faster variant, usually lower compression. */ - LZ4 = 2 + LZ4 = 2, + + /** Use ZSTD, usually has a better compress radio than zlib and faster*/ + ZSTD = 3 }; /** Compressed page meta-data */ @@ -93,6 +96,7 @@ struct Compression { case NONE: case ZLIB: case LZ4: + case ZSTD: break; default: ut_error; @@ -115,6 +119,8 @@ struct Compression { case LZ4: os << "LZ4"; break; + case ZSTD: + os << "ZSTD"; default: os << ""; break; diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index e3e5642..2cf2632 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -90,6 +90,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include #include #include #include @@ -103,6 +104,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /* Flush after each os_fsync_threshold bytes */ unsigned long long os_fsync_threshold = 0; +uint page_zstd_level = DEFAULT_COMPRESSION_LEVEL_ZSTD; + /** Insert buffer segment id */ static const ulint IO_IBUF_SEGMENT = 0; @@ -1404,6 +1407,17 @@ byte *os_file_compress_page(Compression compression, ulint block_size, break; + case Compression::ZSTD: { + size_t csize = ZSTD_compress(dst + FIL_PAGE_DATA, static_cast(out_len), src + FIL_PAGE_DATA, static_cast(content_len), page_zstd_level); + if (ZSTD_isError(csize)) { + *dst_len = src_len; + return (src); + } + + len = static_cast(csize); + break; + } + default: *dst_len = src_len; return (src);