Bug #53929 | Determine size of CSV tables (log table) | ||
---|---|---|---|
Submitted: | 24 May 2010 0:12 | ||
Reporter: | Adam Dixon | Email Updates: | |
Status: | Verified | Impact on me: | |
Category: | MySQL Server: Information schema | Severity: | S4 (Feature request) |
Version: | 5.1 | OS: | Any |
Assigned to: | CPU Architecture: | Any |
[24 May 2010 0:12]
Adam Dixon
[27 May 2010 4:39]
Roel Van de Paar
----- Workaround: ----- DELIMITER // DROP PROCEDURE IF EXISTS checkcsv// CREATE PROCEDURE checkcsv(IN databasename CHAR(200),IN tablename CHAR(200)) BEGIN SET SESSION group_concat_max_len=10*1024*1024; /* 10Mb buffer for CONCAT_WS */ SELECT GROUP_CONCAT(COLUMN_NAME) INTO @columnames FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA = databasename AND TABLE_NAME = tablename); SET @get_colsizes_stmt = CONCAT("SELECT SUM(CHAR_LENGTH(REPLACE(REPLACE(REPLACE(CONCAT_WS(',',",@columnames,"),UNHEX('0A'),'nn'),UNHEX('22'),'nn'),UNHEX('5C'),'nn'))) INTO @total_length FROM ",databasename,".",tablename,";"); PREPARE get_colsizes FROM @get_colsizes_stmt; EXECUTE get_colsizes; DEALLOCATE PREPARE get_colsizes; SET @get_count_stmt = CONCAT('SELECT COUNT(*) INTO @rowcount FROM ',databasename,'.',tablename,';'); PREPARE get_count FROM @get_count_stmt; EXECUTE get_count; DEALLOCATE PREPARE get_count; SELECT 2*COUNT(COLUMN_NAME) INTO @non_numeric_cols_count FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA = databasename AND TABLE_NAME = tablename AND NUMERIC_SCALE IS NULL); /* Counting quotes */ SET @total_size=@total_length+(@rowcount*@non_numeric_cols_count) /* Adding counted quotes */ +@rowcount /* one LineFeed per row */; SET @avg_row_length=@total_size/@rowcount; SET @output_stmt = CONCAT ("SELECT CONCAT('",databasename,"','.','",tablename,"') AS 'Table', ",@rowcount," AS 'Number Of Rows', ROUND(@avg_row_length) AS 'Average Row Length', ",ROUND(@total_size)," AS 'Total size' FROM ",databasename,".",tablename," LIMIT 1;"); PREPARE outputr FROM @output_stmt; EXECUTE outputr; DEALLOCATE PREPARE outputr; END; // DELIMITER ; ----- ----- Usage Example: ----- mysql> CALL checkcsv("mysql","general_log"); +-------------------+----------------+--------------------+------------+ | Table | Number Of Rows | Average Row Length | Total size | +-------------------+----------------+--------------------+------------+ | mysql.general_log | 53 | 183 | 9673 | +-------------------+----------------+--------------------+------------+ 1 row in set (0.01 sec) Query OK, 0 rows affected (0.06 sec) ----- On-disk files for the same CSV table: 26/05/2010 03:56 PM 35 general_log.CSM 26/05/2010 03:38 PM 9,673 general_log.CSV <--- actual data file, matches the above. 25/05/2010 05:38 PM 8,776 general_log.frm This workaround is provided "as is" without guarantees of any kind. It works correctly for Engine=CSV tables only (in terms of calculating the on-disk-size (total size) and average row length). (When it is used on MyISAM and InnoDB tables, it will output the number of rows correctly, and it will also show the data size and total size, but the values are all based on the actual data inside the table only - not on how the table is rendered on-disk. The actual disk table will be much larger.)