diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 342714a..32885c6 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -287,7 +287,13 @@ enum fil_operation_t { fil_addr_t fil_addr_null = {FIL_NULL, 0}; /** Maximum number of threads to use for scanning data files. */ -static const size_t MAX_SCAN_THREADS = 8; +static const size_t MAX_SCAN_THREADS = 4; + +/** Maximium number of pages to read to determins the space ID. */ +static const size_t MAX_PAGES_TO_READ = 1; + +/** Maximum files to scan for space ID per thread. */ +static const size_t MAX_FILES_PER_THREAD = 8000; #ifndef UNIV_HOTBACKUP /** Maximum number of shards supported. */ @@ -10407,10 +10413,9 @@ Datafile::find_space_id(). @param[in] filename File name to check @return s_invalid_space_id if not found, otherwise the space ID */ space_id_t Fil_system::get_tablespace_id(const std::string &filename) { - char buf[sizeof(space_id_t)]; - std::ifstream ifs(filename, std::ios::binary); + FILE *fp = fopen(filename.c_str(), "rb"); - if (!ifs) { + if (fp == nullptr) { ib::warn(ER_IB_MSG_372) << "Unable to open '" << filename << "'"; return (dict_sys_t::s_invalid_space_id); } @@ -10420,61 +10425,51 @@ space_id_t Fil_system::get_tablespace_id(const std::string &filename) { space_ids.reserve(MAX_PAGES_TO_CHECK); - for (page_no_t page_no = 0; page_no < MAX_PAGES_TO_CHECK; ++page_no) { - off_t off; + const auto n_bytes = page_size * MAX_PAGES_TO_CHECK; - off = page_no * page_size + FIL_PAGE_SPACE_ID; + std::unique_ptr buf(new byte[n_bytes]); - if (off == FIL_PAGE_SPACE_ID) { - /* Figure out the page size of the tablespace. If it's - a compressed tablespace. */ - ifs.seekg(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS, ifs.beg); - - if ((ifs.rdstate() & std::ifstream::eofbit) != 0 || - (ifs.rdstate() & std::ifstream::failbit) != 0 || - (ifs.rdstate() & std::ifstream::badbit) != 0) { - return (dict_sys_t::s_invalid_space_id); - } + if (!buf) { + return dict_sys_t::s_invalid_space_id; + } - ifs.read(buf, sizeof(buf)); + auto bytes_read = fread(buf.get(), page_size, MAX_PAGES_TO_READ, fp); - if (!ifs.good() || (size_t)ifs.gcount() < sizeof(buf)) { - return (dict_sys_t::s_invalid_space_id); - } +#ifdef POSIX_FADV_DONTNEED + posix_fadvise(fileno(fp), 0, bytes_read, POSIX_FADV_DONTNEED); +#endif /* POSIX_FADV_DONTNEED */ - uint32_t flags; + if (bytes_read * page_size < 1024) { + fclose(fp); + return dict_sys_t::s_invalid_space_id; + } - flags = mach_read_from_4(reinterpret_cast(buf)); + for (page_no_t i = 0; i < MAX_PAGES_TO_READ; ++i) { + const auto off = i * page_size + FIL_PAGE_SPACE_ID; - const page_size_t space_page_size(flags); + if (off == FIL_PAGE_SPACE_ID) { + /* Figure out the page size of the tablespace from the first page. + If it's a compressed tablespace. */ - page_size = space_page_size.physical(); - } + auto space_flags_offset = FSP_HEADER_OFFSET + FSP_SPACE_FLAGS; - ifs.seekg(off, ifs.beg); + ut_a(space_flags_offset + 4 < n_bytes); - if ((ifs.rdstate() & std::ifstream::eofbit) != 0 || - (ifs.rdstate() & std::ifstream::failbit) != 0 || - (ifs.rdstate() & std::ifstream::badbit) != 0) { - /* Trucated files can be a single page */ - break; - } + const auto flags = mach_read_from_4(buf.get() + space_flags_offset); - ifs.read(buf, sizeof(buf)); + page_size_t space_page_size(flags); - if (!ifs.good() || (size_t)ifs.gcount() < sizeof(buf)) { - /* Trucated files can be a single page */ - break; + page_size = space_page_size.physical(); } - space_id_t space_id; - - space_id = mach_read_from_4(reinterpret_cast(buf)); + space_ids.push_back(mach_read_from_4(buf.get() + off)); - space_ids.push_back(space_id); + if ((i + 1) * page_size >= bytes_read) { + break; + } } - ifs.close(); + fclose(fp); space_id_t space_id; @@ -10484,7 +10479,6 @@ space_id_t Fil_system::get_tablespace_id(const std::string &filename) { for (auto id : space_ids) { if (id == 0 || space_id != id) { space_id = UINT32_UNDEFINED; - break; } } @@ -10710,17 +10704,22 @@ dberr_t Tablespace_dirs::scan(const std::string &in_directories) { Space_id_set unique; Space_id_set duplicates; - size_t n_threads = (ibd_files.size() / 50000); + size_t n_threads = ibd_files.size() / MAX_FILES_PER_THREAD; + + if (ibd_files.size() % MAX_FILES_PER_THREAD) { + ++n_threads; + } if (n_threads > 0) { if (n_threads > MAX_SCAN_THREADS) { n_threads = MAX_SCAN_THREADS; } - ib::info(ER_IB_MSG_382) << "Using " << (n_threads + 1) << " threads to" + ib::info(ER_IB_MSG_382) << "Using " << n_threads << " threads to" << " scan the tablespace files"; } + std::mutex m; using std::placeholders::_1; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4c06eee..3524076 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3073,7 +3073,7 @@ class Validate_files { /* This is the maximum number of tablespaces that can be handled by a single thread. If more than that, the tablespaces will be divided up between up to 8 threads. */ - const size_t MAX_TABLESPACES_PER_THREAD = 50000; + const size_t MAX_TABLESPACES_PER_THREAD = 8000; /* If therea are more than MAX_TABLESPACES_PER_THREAD scanned, then the work will be distributed among up to this many parallel threads. */