diff --git a/extra/innochecksum.c b/extra/innochecksum.c index b55b510..8244239 100644 --- a/extra/innochecksum.c +++ b/extra/innochecksum.c @@ -32,6 +32,7 @@ #include #include #include +#include /* all of these ripped from InnoDB code from MySQL 4.0.22 */ #define UT_HASH_RANDOM_MASK 1463735687 @@ -39,10 +40,17 @@ #define FIL_PAGE_LSN 16 #define FIL_PAGE_FILE_FLUSH_LSN 26 #define FIL_PAGE_OFFSET 4 +#define FIL_PAGE_TYPE 24 #define FIL_PAGE_DATA 38 #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 #define FIL_PAGE_SPACE_OR_CHKSUM 0 -#define UNIV_PAGE_SIZE (2 * 8192) +#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 + +#define PAGE_ZIP_MIN_SIZE_SHIFT 10 +#define PAGE_ZIP_MIN_SIZE (1 << PAGE_ZIP_MIN_SIZE_SHIFT) +#define FSP_SPACE_FLAGS 16 +#define DICT_TF_ZSSIZE_SHIFT 1 +#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT) /* command line argument to do page checks (that's it) */ /* another argument to specify page ranges... seek to right spot and go from there */ @@ -94,7 +102,8 @@ ulint buf_calc_page_new_checksum( /*=======================*/ /* out: checksum */ - uchar* page) /* in: buffer page */ + uchar* page, /* in: buffer page */ + ulint page_size) { ulint checksum; @@ -108,7 +117,7 @@ buf_calc_page_new_checksum( checksum= ut_fold_binary(page + FIL_PAGE_OFFSET, FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) + ut_fold_binary(page + FIL_PAGE_DATA, - UNIV_PAGE_SIZE - FIL_PAGE_DATA + page_size - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM); checksum= checksum & 0xFFFFFFFF; @@ -130,6 +139,157 @@ buf_calc_page_old_checksum( return(checksum); } +/**********************************************************************//** +Calculate the compressed page checksum. +@return page checksum */ +ulint +page_zip_calc_checksum( +/*===================*/ + const void* data, /*!< in: compressed page */ + ulint size) /*!< in: size of compressed page */ +{ + /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN, + and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */ + const Bytef* s = (const Bytef*)data; + uLong adler; + + adler = adler32(0L, s + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - FIL_PAGE_OFFSET); + adler = adler32(adler, s + FIL_PAGE_TYPE, 2); + adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + + return((ulint) adler); +} + +int lsn_match(uchar* p, ulint page_no, ulint page_size, + int compressed, int debug) { + ulint logseq, logseqfield; + logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4); + logseqfield= mach_read_from_4(p + page_size - + FIL_PAGE_END_LSN_OLD_CHKSUM + 4); + if (debug) { + printf("page %lu: log sequence number: first = %lu; second = %lu\n", + page_no, logseq, logseqfield); + if (compressed && (logseq == logseqfield)) + printf("WARNING: lsns should not always match for compressed pages!\n"); + } + return logseq == logseqfield; +} + +int checksum_match(uchar* p, ulint page_no, ulint page_size, + int compressed, int debug) { + ulint csum, csumfield, oldcsumfield, oldcsum; + if (compressed) { + csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM); + oldcsum = page_zip_calc_checksum(p, page_size); + if (debug) + printf("page %lu: oldcsum = %lu; recorded = %lu\n", + page_no, oldcsum, csumfield); + return oldcsum == csumfield; + } else { + /* check the "stored log sequence numbers" */ + if (!lsn_match(p, page_no, page_size, 0, debug)) + { + return 0; + } + /* check old method of checksumming */ + oldcsum= buf_calc_page_old_checksum(p); + oldcsumfield= mach_read_from_4(p + page_size - FIL_PAGE_END_LSN_OLD_CHKSUM); + if (debug) + printf("page %lu: old style: calculated = %lu; recorded = %lu\n", + page_no, oldcsum, oldcsumfield); + if (oldcsumfield != mach_read_from_4(p + FIL_PAGE_LSN) && + oldcsumfield != oldcsum) + { + return 0; + } + /* now check the new method */ + csum= buf_calc_page_new_checksum(p, page_size); + csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM); + if (debug) + printf("page %lu: new style: calculated = %lu; recorded = %lu\n", + page_no, csum, csumfield); + if (csumfield != 0 && csum != csumfield) + { + return 0; + } + return 1; + } +} + +int find_page_size(FILE *f, ulint *page_size, int *compressed, int debug) +{ + uchar *p = (uchar*)malloc(PAGE_ZIP_MIN_SIZE); /* buffer to read data */ + ulint psize; + int bytes; + ulint flags; + ulint zip_ssize; + bytes= fread(p, 1, PAGE_ZIP_MIN_SIZE, f); + rewind(f); + if (bytes != PAGE_ZIP_MIN_SIZE) { + fprintf(stderr, + "Error in reading the first %d bytes of the ibd file.\n", + PAGE_ZIP_MIN_SIZE); + free(p); + return 0; + } + + flags = mach_read_from_4(p + FIL_PAGE_DATA + FSP_SPACE_FLAGS); + zip_ssize = (flags & DICT_TF_ZSSIZE_MASK) >> DICT_TF_ZSSIZE_SHIFT; + if (zip_ssize) { /* table is compressed */ + free(p); + *compressed = 1; + if (!*page_size) + *page_size = ((ulint)PAGE_ZIP_MIN_SIZE >> 1) << zip_ssize; + if (*page_size != ((ulint)PAGE_ZIP_MIN_SIZE >> 1) << zip_ssize) { + fprintf(stderr, "Wrong page size is specified.\n" + "actual page size = %luK\n" + "specified page size = %luK\n" + "You can skip this option and innochecksum will " + "automatically determine the page size.\n", + (((ulint)PAGE_ZIP_MIN_SIZE >> 1) << zip_ssize) >> 10, + (*page_size) >> 10); + return 0; + } + return 1; + } + + *compressed = 0; + if (*page_size) { + free(p); + return 1; + } + + for (psize = 1024; psize < (1024 << 7); psize <<= 1) { + if (debug) + printf("checking if page_size is %luK\n", psize >> 10); + p = (uchar*)realloc(p, psize); + bytes= fread(p, 1, psize, f); + rewind(f); + + if (bytes != (int)psize) { + fprintf(stderr, "Error in reading the first %lu bytes of the ibd file." + "It may be that file is corrupt. You can also try " + "specifying page size (-b ).\n", psize); + free(p); + return 0; + } + + if (checksum_match(p, 0, psize, 0, debug)) { + if (debug) + printf("table has page size %lu and is uncompressed\n", psize); + *page_size = psize; + free(p); + return 1; + } + } + + fprintf(stderr, "Page size can not be determined for the table\n"); + free(p); + return 0; +} + int main(int argc, char **argv) { @@ -150,9 +310,11 @@ int main(int argc, char **argv) int debug= 0; int c; int fd; + int compressed = 0; + ulint page_size = 0; /* remove arguments */ - while ((c= getopt(argc, argv, "cvds:e:p:")) != -1) + while ((c= getopt(argc, argv, "cvds:e:p:b:")) != -1) { switch (c) { @@ -177,6 +339,18 @@ int main(int argc, char **argv) case 'd': debug= 1; break; + case 'b': + page_size = strtoul(optarg, NULL, 0); + if (page_size & (page_size - 1)) { + fprintf(stderr, "page_size (option -b) must be a power of 2\n"); + return 1; + } + if (page_size > 64) { + fprintf(stderr, "page_size (option -b) can be at most 64 KB\n"); + return 1; + } + page_size <<= 10; + break; case ':': fprintf(stderr, "option -%c requires an argument\n", optopt); return 1; @@ -211,8 +385,26 @@ int main(int argc, char **argv) perror("error statting file"); return 1; } + + /* open the file for reading */ + f= fopen(argv[optind], "r"); + if (!f) + { + perror("error opening file"); + return 1; + } + + if (!find_page_size(f, &page_size, &compressed, debug)) { + fprintf(stderr, "error in determining the page size and/or" + " whether the table is in compressed format\n"); + return 1; + } + + printf("Table is %s\n", (compressed ? "compressed" : "not compressed")); + printf("%s size is %luK\n", (compressed ? "Key block" : "Page"), page_size >> 10); + size= st.st_size; - pages= size / UNIV_PAGE_SIZE; + pages= size / page_size; if (just_count) { printf("%lu\n", pages); @@ -242,7 +434,7 @@ int main(int argc, char **argv) return 1; } - offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE; + offset= (off_t)start_page * (off_t)page_size; if (lseek(fd, offset, SEEK_SET) != offset) { @@ -252,51 +444,23 @@ int main(int argc, char **argv) } /* allocate buffer for reading (so we don't realloc every time) */ - p= (uchar *)malloc(UNIV_PAGE_SIZE); + p= (uchar *)malloc(page_size); /* main checksumming loop */ ct= start_page; lastt= 0; while (!feof(f)) { - bytes= fread(p, 1, UNIV_PAGE_SIZE, f); + bytes= fread(p, 1, page_size, f); if (!bytes && feof(f)) return 0; - if (bytes != UNIV_PAGE_SIZE) - { - fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE); - return 1; - } - - /* check the "stored log sequence numbers" */ - logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4); - logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); - if (debug) - printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield); - if (logseq != logseqfield) - { - fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct); - return 1; - } - - /* check old method of checksumming */ - oldcsum= buf_calc_page_old_checksum(p); - oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); - if (debug) - printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield); - if (oldcsumfield != mach_read_from_4(p + FIL_PAGE_LSN) && oldcsumfield != oldcsum) + if (bytes != page_size) { - fprintf(stderr, "page %lu invalid (fails old style checksum)\n", ct); + fprintf(stderr, "bytes read (%d) doesn't match the page size (%lu)\n", bytes, page_size); return 1; } - /* now check the new method */ - csum= buf_calc_page_new_checksum(p); - csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM); - if (debug) - printf("page %lu: new style: calculated = %lu; recorded = %lu\n", ct, csum, csumfield); - if (csumfield != 0 && csum != csumfield) - { - fprintf(stderr, "page %lu invalid (fails new style checksum)\n", ct); + if (!checksum_match(p, ct, page_size, compressed, debug)) { + fprintf(stderr, "page %lu invalid\n", ct); return 1; }