Description:
Hi team,
We saw an unexpected duplicate error when running a select query with group by a json data.
If the internal_tmp_mem_storage_engine is TempTable, the error message will be
ERROR 1022 (23000): Can't write; duplicate key in table '/tmp/#sql1_9_1'
if the internal_tmp_mem_storage_engine is MEMORY, the error message will be
ERROR 1062 (23000): Duplicate entry '3333333333' for key '/tmp/#sql1_a_f.<group_key>'
The query is below
SELECT
JSON_EXTRACT(data2, '$.a') as `a`,
CAST(count(*) AS JSON) as `*`
FROM test.t
WHERE
i < 20
GROUP BY
JSON_EXTRACT(data2, '$.a');
The table structure is
create table t (
id int auto_increment,
data1 json,
data2 json,
`i` bigint GENERATED ALWAYS AS (
json_unquote(json_extract(`data1`,_utf8mb4'$.i'))
) STORED,
`a` varchar(128) GENERATED ALWAYS AS (
json_unquote(json_extract(`data2`,_utf8mb4'$.a'))
) STORED,
primary key (id),
key (a,i),
key i (i)
)ENGINE=innodb;
It uses the index i.
mysql> DESC SELECT JSON_EXTRACT(data2, '$.a') as `a`, CAST(count(*) AS JSON) as `*` FROM test.t WHERE i < 20 GROUP BY JSON_EXTRACT(data2, '$.a')\G
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: t
partitions: NULL
type: range
possible_keys: a,i
key: i
key_len: 9
ref: NULL
rows: 17
filtered: 100.00
Extra: Using index condition; Using temporary
1 row in set, 1 warning (0.01 sec)
We can skip the error by using a different index.
mysql> SELECT JSON_EXTRACT(data2, '$.a') as `a`, CAST(count(data1) AS JSON) as `*` FROM test.t IGNORE KEY (i) WHERE i < 20 GROUP BY JSON_EXTRACT(data2, '$.a');
+--------------+------+
| a | * |
+--------------+------+
| "0000000000" | 1 |
| "1111111111" | 1 |
| "2222222222" | 1 |
| "3333333333" | 2 |
| "4444444444" | 2 |
| "5555555555" | 2 |
| "6666666666" | 2 |
| "7777777777" | 2 |
| "8888888888" | 2 |
| "9999999999" | 2 |
+--------------+------+
10 rows in set (0.00 sec)
How to repeat:
- create table
CREATE DATABASE test;
USE test;
create table t (
id int auto_increment,
data1 json,
data2 json,
`i` bigint GENERATED ALWAYS AS (
json_unquote(json_extract(`data1`,_utf8mb4'$.i'))
) STORED,
`a` varchar(128) GENERATED ALWAYS AS (
json_unquote(json_extract(`data2`,_utf8mb4'$.a'))
) STORED,
primary key (id),
key (a,i),
key i (i)
)ENGINE=innodb;
- add data
INSERT INTO t (data1) values (JSON_OBJECT('i', 1)),(JSON_OBJECT('i', 1)),(JSON_OBJECT('i', 1)),(JSON_OBJECT('i', 1)),(JSON_OBJECT('i', 1));
INSERT INTO t (data1) select data1 from t;
INSERT INTO t (data1) select data1 from t;
INSERT INTO t (data1) select data1 from t;
INSERT INTO t (data1) select data1 from t;
UPDATE t SET
data1 = JSON_OBJECT('i', id),
data2 = JSON_OBJECT('a', repeat(mod(id,10),10));
- run the select query
SELECT
JSON_EXTRACT(data2, '$.a') as `a`,
CAST(count(*) AS JSON) as `*`
FROM test.t
WHERE
i < 20
GROUP BY
JSON_EXTRACT(data2, '$.a');