Bug #75763 InnoDB FULLTEXT index reduces insert performance by up to 6x on JSON docs
Submitted: 4 Feb 2015 13:29 Modified: 5 Feb 2015 6:55
Reporter: Justin Swanhart Email Updates:
Status: Verified Impact on me:
None 
Category:MySQL Server: FULLTEXT search Severity:S2 (Serious)
Version:5.6.21, 5.6.24 OS:Any
Assigned to: CPU Architecture:Any
Tags: fulltext, innodb, innodb fulltext, json

[4 Feb 2015 13:29] Justin Swanhart
Description:
I have a set of 18001 JSON documents that are each approximately 4K in size

I want to store them in a MySQL table and FULLTEXT index them, but the insertion performance on the table is too low once the index is added.

The size of the documents is significantly smaller than the buffer pool and DB is provisioned on SSD storage.

How to repeat:
create table json (id serial, doc mediumtext);
Query OK, 0 rows affected (0.00 sec)

load data local infile '/home/justin/Downloads/tmp/companies.json' into table json (doc);
Query OK, 18801 rows affected (2.25 sec) 
Records: 18801 Deleted: 0 Skipped: 0 Warnings: 0

create table json2 like json;
Query OK, 0 rows affected (0.00 sec)

alter table json2 add fulltext(doc);
Query OK, 0 rows affected, 1 warning (0.04 sec)
Records: 0 Duplicates: 0 Warnings: 1

load data local infile '/home/justin/Downloads/tmp/companies.json' into table json2 (doc);
Query OK, 18801 rows affected (12.97 sec) 
Records: 18801 Deleted: 0 Skipped: 0 Warnings: 0

Suggested fix:
unknown
[4 Feb 2015 13:43] Justin Swanhart
You can get the data at the following link.  It is free to public to download, but there is no license information so I am unsure if I am allowed to redistribute it, and it is also too large to attach.

http://jsonstudio.com/wp-content/uploads/2014/02/companies.zip
[4 Feb 2015 13:46] Justin Swanhart
For comparison, MyISAM FTS is 2x faster:

create table json3 like json2;
Query OK, 0 rows affected (0.13 sec)

alter table json3 engine=MyISAM;
Query OK, 0 rows affected (0.00 sec)
Records: 0  Duplicates: 0  Warnings: 0

show create table json3\G
*************************** 1. row ***************************
       Table: json3
Create Table: CREATE TABLE `json3` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `doc` mediumtext,
  UNIQUE KEY `id` (`id`),
  FULLTEXT KEY `doc` (`doc`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
1 row in set (0.00 sec)

load data local infile '/home/justin/Downloads/tmp/companies.json' into table json3 (doc);
Query OK, 18801 rows affected (8.64 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0

I'd like to see InnoDB at least the same speed as MyISAM, but faster than MyISAM if possible.
[5 Feb 2015 6:55] Umesh Shastry
Hello Justin Swanhart,

Thank you for the report.

Thanks,
Umesh
[5 Feb 2015 6:56] Umesh Shastry
// 5.6.24
//Build used

[root@cluster-repo mysql-5.6.24]# more docs/INFO_SRC
commit: caae6b64ef613df007609b569b8368c599d5c995
date: 2015-02-03 12:05:54 +0100
build-date: 2015-02-03 18:45:47 +0100
short: caae6b6
branch: mysql-5.6

MySQL source 5.6.24

mysql> create table json (id serial, doc mediumtext);
Query OK, 0 rows affected (0.01 sec)

mysql> load data local infile '/data/ushastry/server/mysql-5.6.24/companies.json' into table json (doc);
Query OK, 18801 rows affected (3.72 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0

mysql> create table json2 like json;
Query OK, 0 rows affected (0.01 sec)

mysql> alter table json2 add fulltext(doc);
Query OK, 0 rows affected, 1 warning (0.09 sec)
Records: 0  Duplicates: 0  Warnings: 1

mysql> show warnings;
+---------+------+--------------------------------------------------+
| Level   | Code | Message                                          |
+---------+------+--------------------------------------------------+
| Warning |  124 | InnoDB rebuilding table to add column FTS_DOC_ID |
+---------+------+--------------------------------------------------+
1 row in set (0.00 sec)

mysql> load data local infile '/data/ushastry/server/mysql-5.6.24/companies.json' into table json2 (doc);
Query OK, 18801 rows affected (19.77 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0

mysql> -- for myisam
mysql> create table json3 like json2;
Query OK, 0 rows affected (0.07 sec)

mysql> alter table json3 engine=MyISAM;
Query OK, 0 rows affected (0.01 sec)
Records: 0  Duplicates: 0  Warnings: 0

mysql> show create table json3\G
*************************** 1. row ***************************
       Table: json3
Create Table: CREATE TABLE `json3` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `doc` mediumtext,
  UNIQUE KEY `id` (`id`),
  FULLTEXT KEY `doc` (`doc`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
1 row in set (0.00 sec)

mysql> load data local infile '/data/ushastry/server/mysql-5.6.24/companies.json' into table json3 (doc);
Query OK, 18801 rows affected (9.60 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0
[5 Feb 2015 8:18] Umesh Shastry
// 5.7.6
// Build used

commit: 1d5409f947189064bafdbd6c437640fe690ebbbd
date: 2015-02-04 15:27:26 +0100
build-date: 2015-02-04 17:04:31 +0100
short: 1d5409f
branch: mysql-trunk

MySQL source 5.7.6

mysql> create table json (id serial, doc mediumtext);
Query OK, 0 rows affected (0.00 sec)

mysql> load data local infile '/export/umesh/mysql-5.7.6/companies.json' into table json (doc);
Query OK, 18801 rows affected (3.43 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0

mysql> create table json2 like json;
Query OK, 0 rows affected (0.00 sec)

mysql> alter table json2 add fulltext(doc);
Query OK, 0 rows affected, 1 warning (0.09 sec)
Records: 0  Duplicates: 0  Warnings: 1

mysql> show warnings;
+---------+------+--------------------------------------------------+
| Level   | Code | Message                                          |
+---------+------+--------------------------------------------------+
| Warning |  124 | InnoDB rebuilding table to add column FTS_DOC_ID |
+---------+------+--------------------------------------------------+
1 row in set (0.00 sec)

mysql> load data local infile '/export/umesh/mysql-5.7.6/companies.json' into table json2 (doc);
Query OK, 18801 rows affected (14.08 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0

mysql> -- for myisam
mysql> create table json3 like json2;
Query OK, 0 rows affected (0.01 sec)

mysql> alter table json3 engine=MyISAM;
Query OK, 0 rows affected (0.01 sec)
Records: 0  Duplicates: 0  Warnings: 0

mysql> show create table json3\G
*************************** 1. row ***************************
       Table: json3
Create Table: CREATE TABLE `json3` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `doc` mediumtext,
  UNIQUE KEY `id` (`id`),
  FULLTEXT KEY `doc` (`doc`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
1 row in set (0.00 sec)

mysql> load data local infile '/export/umesh/mysql-5.7.6/companies.json' into table json3 (doc);
Query OK, 18801 rows affected (8.64 sec)
Records: 18801  Deleted: 0  Skipped: 0  Warnings: 0