From 756b806db928eab52eb6d250a07c0be05b64e750 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikael=20Ronstr=C3=B6m?= Date: Mon, 8 May 2023 15:07:00 +0200 Subject: [PATCH] CONTRIB_371: Efficient zeroing of UNDO, REDO and tablespace files When initialising REDO log files at startup we want to ensure that the entire file size is preallocated and that the file is properly initialised. The same is true for UNDO log files in LGMAN and tablespace files. To speed up this we use special XFS methods and POSIX methods that apply to the file systems we recommend users to use. This means that we can preallocate file space without actually writing it and we can even ensure that reads of this area returns pages filled with zeroes. We can also use fallocate which works for XFS and ext4 at least in Linux. We still need to write the initial parts of the file and for the REDO log we have to write each start page of each MByte. Encrypted and compressed files have a file header that should not be zeroed. More debug info. --- config.h.cmake | 2 + configure.cmake | 2 + .../kernel/signaldata/FsReadWriteReq.hpp | 4 ++ storage/ndb/include/portlib/ndb_file.h | 4 ++ storage/ndb/include/util/ndbxfrm_file.h | 5 ++ .../ndb/src/common/portlib/ndb_file_posix.cpp | 35 +++++++++- storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp | 3 +- .../ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 28 ++++++-- storage/ndb/src/kernel/blocks/lgman.cpp | 44 ++++++++++-- storage/ndb/src/kernel/blocks/lgman.hpp | 4 +- .../ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp | 68 ++++++++++++++++--- storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp | 9 +-- storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp | 3 +- storage/ndb/src/kernel/blocks/tsman.cpp | 13 +++- storage/ndb/src/kernel/blocks/tsman.hpp | 4 +- 15 files changed, 194 insertions(+), 34 deletions(-) diff --git a/config.h.cmake b/config.h.cmake index b660e3049313..458ca1f87d0a 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -1,4 +1,5 @@ /* Copyright (c) 2009, 2023, Oracle and/or its affiliates. + Copyright (c) 2023, 2023, Hopsworks and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, @@ -73,6 +74,7 @@ #cmakedefine HAVE_CHOWN 1 #cmakedefine HAVE_CUSERID 1 #cmakedefine HAVE_DIRECTIO 1 +#cmakedefine HAVE_FALLOCATE 1 #cmakedefine HAVE_FTRUNCATE 1 #cmakedefine HAVE_FCHMOD 1 #cmakedefine HAVE_FCNTL 1 diff --git a/configure.cmake b/configure.cmake index c4d3dcca1ab4..e69ac82b06a7 100644 --- a/configure.cmake +++ b/configure.cmake @@ -1,4 +1,5 @@ # Copyright (c) 2009, 2023, Oracle and/or its affiliates. +# Copyright (c) 2023, 2023, Hopsworks and/or its affiliates. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License, version 2.0, @@ -231,6 +232,7 @@ CHECK_FUNCTION_EXISTS (index HAVE_INDEX) CHECK_FUNCTION_EXISTS (chown HAVE_CHOWN) CHECK_FUNCTION_EXISTS (cuserid HAVE_CUSERID) CHECK_FUNCTION_EXISTS (directio HAVE_DIRECTIO) +CHECK_FUNCTION_EXISTS (fallocate HAVE_FALLOCATE) CHECK_FUNCTION_EXISTS (ftruncate HAVE_FTRUNCATE) CHECK_FUNCTION_EXISTS (fchmod HAVE_FCHMOD) CHECK_FUNCTION_EXISTS (fcntl HAVE_FCNTL) diff --git a/storage/ndb/include/kernel/signaldata/FsReadWriteReq.hpp b/storage/ndb/include/kernel/signaldata/FsReadWriteReq.hpp index b125438f5815..058d17850775 100644 --- a/storage/ndb/include/kernel/signaldata/FsReadWriteReq.hpp +++ b/storage/ndb/include/kernel/signaldata/FsReadWriteReq.hpp @@ -1,5 +1,6 @@ /* Copyright (c) 2003, 2023, Oracle and/or its affiliates. + Copyright (c) 2023, 2023, Hopsworks and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, @@ -128,6 +129,9 @@ class FsReadWriteReq { struct { Uint32 pageNumber; } sharedPage; + struct { + Uint32 initZero; + } zeroPageIndicator; } data; static Uint8 getSyncFlag(const UintR & opFlag); diff --git a/storage/ndb/include/portlib/ndb_file.h b/storage/ndb/include/portlib/ndb_file.h index 200a68ca2f88..09900a88873c 100644 --- a/storage/ndb/include/portlib/ndb_file.h +++ b/storage/ndb/include/portlib/ndb_file.h @@ -178,6 +178,10 @@ class ndb_file * Reserve disk blocks for entire file. */ int allocate() const; + /* + * Zero all disk pages allocated by allocate (avoid headers) + */ + int init_zero(ndb_off_t data_size, ndb_off_t offset) const; int set_block_size_and_alignment(size_t size, size_t alignment); bool have_direct_io_support() const; diff --git a/storage/ndb/include/util/ndbxfrm_file.h b/storage/ndb/include/util/ndbxfrm_file.h index 52aa3cde889a..49e4362851f7 100644 --- a/storage/ndb/include/util/ndbxfrm_file.h +++ b/storage/ndb/include/util/ndbxfrm_file.h @@ -343,6 +343,7 @@ class ndbxfrm_file int read_forward(ndbxfrm_output_iterator* out); int read_backward(ndbxfrm_output_reverse_iterator* out); ndb_off_t move_to_end(); + ndb_off_t get_payload_start() const; private: // file fixed properties @@ -480,4 +481,8 @@ inline bool ndbxfrm_file::is_definite_offset(ndb_off_t offset) return (offset != INDEFINITE_OFFSET); } +inline ndb_off_t ndbxfrm_file::get_payload_start() const +{ + return m_payload_start; +} #endif diff --git a/storage/ndb/src/common/portlib/ndb_file_posix.cpp b/storage/ndb/src/common/portlib/ndb_file_posix.cpp index becee194f72b..1d493b2957e7 100644 --- a/storage/ndb/src/common/portlib/ndb_file_posix.cpp +++ b/storage/ndb/src/common/portlib/ndb_file_posix.cpp @@ -225,7 +225,6 @@ int ndb_file::truncate(ndb_off_t end) const } return 0; } - int ndb_file::allocate() const { ndb_off_t size = get_size(); @@ -236,20 +235,23 @@ int ndb_file::allocate() const #ifdef HAVE_XFS_XFS_H if (::platform_test_xfs_fd(m_handle)) { - std::printf("Using xfsctl(XFS_IOC_RESVSP64) to allocate disk space"); + std::printf("Using xfsctl(XFS_IOC_RESVSP64) to allocate disk space" + ", size: %lu\n", size); xfs_flock64_t fl; fl.l_whence= 0; fl.l_start= 0; fl.l_len= (ndb_off_t)size; if (::xfsctl(NULL, m_handle, XFS_IOC_RESVSP64, &fl) < 0) { - std::printf("failed to optimally allocate disk space"); + std::printf("failed to optimally allocate disk space\n"); return -1; } return 0; } #endif #ifdef HAVE_POSIX_FALLOCATE + std::printf("Using posix_fallocate to allocate disk space" + ", size: %llu\n", size); return ::posix_fallocate(m_handle, 0, size); #else errno = ENOSPC; @@ -535,3 +537,30 @@ int ndb_file::reopen_with_sync(const char name[]) return 0; } + +int ndb_file::init_zero(ndb_off_t data_size, ndb_off_t offset) const +{ +#ifdef HAVE_XFS_XFS_H + if (::platform_test_xfs_fd(m_handle)) + { + std::printf("Using xfsctl(XFS_IOC_ZERO_RANGE) to zero disk space," + " data_size: %lu, offset: %lu\n", data_size, offset); + xfs_flock64_t fl; + fl.l_whence= 0; + fl.l_start= (off64_t)start_offset; + fl.l_len= (off64_t)data_size; + if (::xfsctl(NULL, m_handle, XFS_IOC_ZERO_RANGE, &fl) < 0) + { + std::printf("failed to optimally zero disk space\n"); + return -1; + } + } +#endif +#ifdef HAVE_FALLOCATE + std::printf("Using fallocate to zero disk space, data_size: %llu," + " offset: %llu\n", data_size, offset); + return fallocate(m_handle, FALLOC_FL_ZERO_RANGE, offset, data_size); +#endif + return -1; +} + diff --git a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index b650da94cea3..cc0b98657dce 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -2982,7 +2982,8 @@ class Dblqh } public: - void execFSWRITEREQ(const FsReadWriteReq*) const /* called direct cross threads from Ndbfs */; + /* called direct cross threads from Ndbfs */ + Uint32 execFSWRITEREQ(const FsReadWriteReq*) const; void execLQH_WRITELOG_REQ(Signal* signal); void execTUP_ATTRINFO(Signal* signal); void execREAD_PSEUDO_REQ(Uint32 opPtrI, Uint32 attrId, Uint32* out, Uint32 out_words); diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index 482a7a3a3bcd..98971793bfa4 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -25984,7 +25984,7 @@ void Dblqh::openLogfileInit(Signal* signal, LogFileRecordPtr logFilePtr) } }//Dblqh::openLogfileInit() -void +Uint32 Dblqh::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross threads from Ndbfs */ { /** @@ -26012,17 +26012,22 @@ Dblqh::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th ptrCheckGuard(currLogPartPtr, clogPartFileSize, logPartRecord); Uint32 page_no = req->varIndex; + Uint32 init_zero = req->data.zeroPageIndicator.initZero; LogPageRecordPtr currLogPagePtr; currLogPagePtr.p = (LogPageRecord*)page_ptr.p; - std::memset(page_ptr.p, 0, sizeof(LogPageRecord)); if (page_no == 0) { // keep writing these afterwards + if (init_zero == 1) + { + std::memset(page_ptr.p, 0, sizeof(LogPageRecord)); + } } else if (((page_no % ZPAGES_IN_MBYTE) == 0) || (page_no == ((clogFileSize * ZPAGES_IN_MBYTE) - 1))) { + std::memset(page_ptr.p, 0, sizeof(LogPageRecord)); currLogPagePtr.p->logPageWord[ZPOS_LOG_LAP] = currLogPartPtr.p->logLap; currLogPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED] = currLogPartPtr.p->logPartNewestCompletedGCI; @@ -26038,13 +26043,24 @@ Dblqh::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th currLogPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL; currLogPagePtr.p->logPageWord[ZPOS_CHECKSUM] = calcPageCheckSum(currLogPagePtr); + return 0; } - else if (0) + else { - currLogPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL; - currLogPagePtr.p->logPageWord[ZPOS_CHECKSUM] = - calcPageCheckSum(currLogPagePtr); + if (init_zero == 1) + { + std::memset(page_ptr.p, 0, sizeof(LogPageRecord)); + currLogPagePtr.p->logPageWord[ZPOS_LOG_LAP] = currLogPartPtr.p->logLap; + currLogPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL; + currLogPagePtr.p->logPageWord[ZPOS_CHECKSUM] = + calcPageCheckSum(currLogPagePtr); + } } + if (init_zero == 0) + { + return 1; + } + return 0; } /* OPEN FOR READ/WRITE, DO CREATE AND DO TRUNCATE FILE */ diff --git a/storage/ndb/src/kernel/blocks/lgman.cpp b/storage/ndb/src/kernel/blocks/lgman.cpp index 0bc847416700..8555171b2b05 100644 --- a/storage/ndb/src/kernel/blocks/lgman.cpp +++ b/storage/ndb/src/kernel/blocks/lgman.cpp @@ -1662,7 +1662,7 @@ Lgman::open_file(Signal* signal, * NDBFS from the DataMemory in DBTUP. So these pages we are allowed to * change since they are owned at this moment by the NDB file system thread. */ -void +Uint32 Lgman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross threads from Ndbfs */ { jamNoBlock(); @@ -1674,6 +1674,7 @@ Lgman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th req->fsFormatSharedPage); ndbrequire(m_shared_page_pool.getPtr(page_ptr, req->data.sharedPage.pageNumber)); + Uint32 init_zero = req->data.zeroPageIndicator.initZero; /** * This code is executed when creating a new UNDO logfile group. * In this case we always use the new v2 format. @@ -1701,6 +1702,7 @@ Lgman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th (File_formats::Undofile::Zero_page_v2*)page; page_v2->m_checksum = 0; } + return 0; } else if (req->varIndex == 1) { @@ -1740,9 +1742,14 @@ Lgman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th page->m_data[0] = (File_formats::Undofile::UNDO_END << 16) | 1 ; page->m_page_header.m_page_type = File_formats::PT_Undopage; } + return 0; } else { + if (init_zero == 0) + { + return 2; + } memset(page_ptr.p, 0, sizeof(File_formats::Undofile::Undo_page_v2)); if (v2) { @@ -1771,6 +1778,7 @@ Lgman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th page->m_words_used = 0; } } + return 0; } void @@ -4223,7 +4231,13 @@ Lgman::execFSREADCONF(Signal* signal) client_unlock(number(), __LINE__, this); return; } - else if ((file_ptr.p->m_state & Undofile::FS_EXECUTING) == + + /** + * Handle possible zero pages read and ensure they are correctly initialised + * Should never happen with Page Zero. + */ + jam(); + if ((file_ptr.p->m_state & Undofile::FS_EXECUTING) == Undofile::FS_EXECUTING) { jam(); @@ -4264,13 +4278,31 @@ Lgman::execFSREADCONF(Signal* signal) Ptr page_ptr; ndbrequire(m_shared_page_pool.getPtr(page_ptr, file_ptr.p->m_online.m_outstanding)); + File_formats::Undofile::Undo_page_v2* page = + (File_formats::Undofile::Undo_page_v2*)page_ptr.p; + if (page->m_page_header.m_page_type == File_formats::PT_Unallocated) + { + jam(); + page->m_page_header.m_page_lsn_hi = 0; + page->m_page_header.m_page_lsn_lo = 0; + page->m_words_used = 1; + page->m_checksum = 0; + page->m_ndb_version = NDB_DISK_V2; + page->m_unused[0] = 0; + page->m_unused[1] = 0; + page->m_unused[2] = 0; + page->m_unused[3] = 0; + page->m_unused[4] = 0; + page->m_unused[5] = 0; + page->m_data[0] = (File_formats::Undofile::UNDO_END << 16) | 1 ; + page->m_page_header.m_page_type = File_formats::PT_Undopage; + } + file_ptr.p->m_online.m_outstanding= 0; - File_formats::Undofile::Undo_page* page = - (File_formats::Undofile::Undo_page*)page_ptr.p; - Uint64 lsn = 0; - lsn += page->m_page_header.m_page_lsn_hi; lsn <<= 32; + lsn += page->m_page_header.m_page_lsn_hi; + lsn <<= 32; lsn += page->m_page_header.m_page_lsn_lo; switch(file_ptr.p->m_state){ diff --git a/storage/ndb/src/kernel/blocks/lgman.hpp b/storage/ndb/src/kernel/blocks/lgman.hpp index 5bfbd90a887a..359974bb7eec 100644 --- a/storage/ndb/src/kernel/blocks/lgman.hpp +++ b/storage/ndb/src/kernel/blocks/lgman.hpp @@ -1,4 +1,5 @@ /* +<<<<<<< HEAD Copyright (c) 2005, 2023, Oracle and/or its affiliates. This program is free software; you can redistribute it and/or modify @@ -49,7 +50,8 @@ class Lgman : public SimulatedBlock BLOCK_DEFINES(Lgman); public: - void execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross threads from Ndbfs */; + /* called direct cross threads from Ndbfs */ + Uint32 execFSWRITEREQ(const FsReadWriteReq* req) const; protected: void execSTTOR(Signal* signal); void sendSTTORRY(Signal*); diff --git a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp index 9003fb1a90fd..0aa5a8d035d8 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp @@ -51,6 +51,16 @@ //#define DEBUG_ODIRECT +#if (defined(VM_TRACE) || defined(ERROR_INSERT)) +//#define DEBUG_FSWRITEREQ 1 +#endif + +#ifdef DEBUG_FSWRITEREQ +#define DEB_FSWRITEREQ(arglist) do { g_eventLogger->info arglist ; } while (0) +#else +#define DEB_FSWRITEREQ(arglist) do { } while (0) +#endif + AsyncFile::AsyncFile(Ndbfs& fs) : theFileName(), m_thread_bound(false), m_fs(fs) { m_thread = 0; @@ -293,6 +303,12 @@ AsyncFile::openReq(Request * request) { kdf_iter_count = -1; // Use PBKDF2 let ndb_ndbxfrm decide iter count } + DEB_FSWRITEREQ(("File %s data_size: %llu, get_data_size: %llu," + " file_block_size: %lu", + theFileName.c_str(), + data_size, + m_file.get_size(), + file_block_size)); rc = m_xfile.create( m_file, use_gz, @@ -309,6 +325,7 @@ AsyncFile::openReq(Request * request) } else { + DEB_FSWRITEREQ(("File %s opened", theFileName.c_str())); rc = m_xfile.open(m_file, pwd, pwd_len); if (rc < 0) NDBFS_SET_REQUEST_ERROR(request, get_last_os_error()); } @@ -317,9 +334,16 @@ AsyncFile::openReq(Request * request) m_file.close(); goto remove_if_created; } + DEB_FSWRITEREQ(("File %s, get_size: %llu, get_data_size() = %llu", + theFileName.c_str(), + m_file.get_size(), + m_xfile.get_data_size())); if (ndbxfrm_file::is_definite_size(data_size) && !is_data_size_estimated && size_t(m_xfile.get_data_size()) != data_size) { + g_eventLogger->info("AsyncFile.cpp wrong size: data_size: %llu" + ", file size: %llu", + data_size, m_xfile.get_data_size()); NDBFS_SET_REQUEST_ERROR(request, FsRef::fsErrInvalidFileSize); m_file.close(); goto remove_if_created; @@ -410,9 +434,22 @@ AsyncFile::openReq(Request * request) m_file.set_autosync(16 * 1024 * 1024); // Reserve disk blocks for whole file + Uint32 init_zero = 1; if (m_file.allocate() == -1) { - // If fail, ignore, will try to write file anyway. + /** + * If fail, ignore, will try to write file anyway. + * Since we were not able to allocate it there is no + * reason to try to initialise it to zero. + */ + } + else if (m_file.init_zero(data_size, m_xfile.get_payload_start()) != -1) + { + init_zero = 0; + } + else + { + ;// If fail to zero, ignore, will try to write file anyway. } // Initialise blocks @@ -442,17 +479,20 @@ AsyncFile::openReq(Request * request) Uint32 page_cnt = (!m_xfile.is_transformed()) ? m_page_cnt : (m_page_cnt - 1); require(page_cnt > 0); - while (off < file_data_size) + int ret_code = 0; + while ((ret_code < 2) && off < file_data_size) { ndb_off_t size = 0; Uint32 cnt = 0; - while (cnt < page_cnt && (off + size) < file_data_size) + ndb_off_t start_offset = off; + while (cnt < page_cnt && (start_offset + size) < file_data_size) { req->filePointer = 0; // DATA 0 req->userPointer = request->theUserPointer; // DATA 2 req->numberOfPages = 1; // DATA 5 req->varIndex = index++; req->operationFlag = 0; + req->data.zeroPageIndicator.initZero = init_zero; FsReadWriteReq::setFormatFlag(req->operationFlag, FsReadWriteReq::fsFormatSharedPage); if (!m_xfile.is_transformed()) @@ -460,8 +500,18 @@ AsyncFile::openReq(Request * request) else req->data.sharedPage.pageNumber = m_page_ptr.i + page_cnt; - m_fs.callFSWRITEREQ(request->theUserReference, req); + ret_code = m_fs.callFSWRITEREQ(request->theUserReference, req); + DEB_FSWRITEREQ(("FSWRITEREQ page %u, ret_code: %u, ref: %x", + index - 1, + ret_code, + request->theUserReference)); + + off += request->par.open.page_size; + if (ret_code > 0) + { + break; + } if (m_xfile.is_transformed()) { const GlobalPage* src = m_page_ptr.p + page_cnt; @@ -476,11 +526,12 @@ AsyncFile::openReq(Request * request) abort(); } } - cnt++; size += request->par.open.page_size; } - ndb_off_t save_size = size; + DEB_FSWRITEREQ(("FSWRITEREQ write_size %llu, ref: %x", + size, + request->theUserReference)); byte* buf = (byte*)m_page_ptr.p; while (size > 0) { @@ -489,7 +540,7 @@ AsyncFile::openReq(Request * request) #endif int n; ndbxfrm_input_iterator in = {buf, buf + size, false}; - int rc = m_xfile.write_transformed_pages(off, &in); + int rc = m_xfile.write_transformed_pages(start_offset, &in); if (rc == -1) n = -1; else @@ -502,6 +553,7 @@ AsyncFile::openReq(Request * request) } size -= n; buf += n; + start_offset += n; } if (size != 0) { @@ -509,8 +561,6 @@ AsyncFile::openReq(Request * request) m_file.close(); goto remove_if_created; } - require(save_size > 0); - off += save_size; } if (m_file.sync() == -1) { diff --git a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp index 3e30c173fbba..9ed3c5e015d6 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.cpp @@ -2002,7 +2002,7 @@ Ndbfs::get_filename(Uint32 fd) const return ""; } -void Ndbfs::callFSWRITEREQ(BlockReference ref, FsReadWriteReq* req) const +Uint32 Ndbfs::callFSWRITEREQ(BlockReference ref, FsReadWriteReq* req) const { Uint32 block = refToMain(ref); Uint32 instance = refToInstance(ref); @@ -2017,17 +2017,18 @@ void Ndbfs::callFSWRITEREQ(BlockReference ref, FsReadWriteReq* req) const switch (block) { case DBLQH: - static_cast(rec_block)->execFSWRITEREQ(req); + return static_cast(rec_block)->execFSWRITEREQ(req); break; case TSMAN: - static_cast(rec_block)->execFSWRITEREQ(req); + return static_cast(rec_block)->execFSWRITEREQ(req); break; case LGMAN: - static_cast(rec_block)->execFSWRITEREQ(req); + return static_cast(rec_block)->execFSWRITEREQ(req); break; default: ndbabort(); } + return 0; } #if defined(VM_TRACE) || defined(ERROR_INSERT) || !defined(NDEBUG) diff --git a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp index 8c429cbdce9c..3d681904c73d 100644 --- a/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp +++ b/storage/ndb/src/kernel/blocks/ndbfs/Ndbfs.hpp @@ -1,5 +1,6 @@ /* Copyright (c) 2003, 2023, Oracle and/or its affiliates. + Copyright (c) 2023, 2023, Hopsworks and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, @@ -53,7 +54,7 @@ class Ndbfs : public SimulatedBlock static Uint32 translateErrno(int aErrno); - void callFSWRITEREQ(BlockReference ref, FsReadWriteReq* req) const; + Uint32 callFSWRITEREQ(BlockReference ref, FsReadWriteReq* req) const; protected: BLOCK_DEFINES(Ndbfs); diff --git a/storage/ndb/src/kernel/blocks/tsman.cpp b/storage/ndb/src/kernel/blocks/tsman.cpp index 0726fdf66b65..31ce8108c51b 100644 --- a/storage/ndb/src/kernel/blocks/tsman.cpp +++ b/storage/ndb/src/kernel/blocks/tsman.cpp @@ -1097,7 +1097,7 @@ Tsman::open_file(Signal* signal, return 0; } -void +Uint32 Tsman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross threads from Ndbfs */ { /** @@ -1133,12 +1133,20 @@ Tsman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th Uint32 size = ptr.p->m_extent_size; Uint32 extent_pages = ptr.p->m_create.m_extent_pages; Uint32 datapages = ptr.p->m_create.m_data_pages; + Uint32 init_zero = req->data.zeroPageIndicator.initZero; bool v2 = (ptr.p->m_ndb_version >= NDB_DISK_V2); Uint32 header_words = File_formats::Datafile::extent_header_words(size, v2); Uint32 per_page = File_formats::Datafile::extent_page_words(v2) / header_words; Uint32 extents = datapages/size; + if ((init_zero == 0) && + page_no > 0 && + ((page_no - 1) >= extent_pages)) + { + /* No need to write to page, it is already zero:ed */ + return 2; + } client_lock(0); if (page_no == 0) { @@ -1238,6 +1246,7 @@ Tsman::execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross th page_header->m_page_type = File_formats::PT_Unallocated; } client_unlock(0); + return 0; } void @@ -1509,7 +1518,7 @@ Tsman::execFSREADCONF(Signal* signal){ if(page->m_extent_headers_per_page != per_page) break; - osError = 10; + osError = 10; Uint32 extents = page->m_data_pages / ptr.p->m_extent_size; if(page->m_extent_count != extents) break; diff --git a/storage/ndb/src/kernel/blocks/tsman.hpp b/storage/ndb/src/kernel/blocks/tsman.hpp index 73efeea58e70..7ab245fb8693 100644 --- a/storage/ndb/src/kernel/blocks/tsman.hpp +++ b/storage/ndb/src/kernel/blocks/tsman.hpp @@ -1,5 +1,6 @@ /* Copyright (c) 2005, 2023, Oracle and/or its affiliates. + Copyright (c) 2023, 2023, Hopsworks and/or its affiliates. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, @@ -48,7 +49,8 @@ class Tsman : public SimulatedBlock BLOCK_DEFINES(Tsman); public: - void execFSWRITEREQ(const FsReadWriteReq* req) const /* called direct cross threads from Ndbfs */; + /* called direct cross threads from Ndbfs */ + Uint32 execFSWRITEREQ(const FsReadWriteReq* req) const; protected: void execSTTOR(Signal* signal);