Description:
I am Using 5.1.35-community-log and also tried on 5.4.1-beta-community with Windows XP OS
When i insert into InnoDB Tables with the auto_increment field by passing the null values, the data inserted is not correct.
whereas the data looks correct with MYISAM engine.
i need to attach the flat files.
How to repeat:
First we need to create 2 tables,
CREATE TABLE product_dim
( product_sk INT NOT NULL AUTO_INCREMENT PRIMARY KEY
, product_code INT
, product_name CHAR(30)
, product_category CHAR(30)
, effective_date DATE
, expiry_date DATE ) engine=INNODB
;
CREATE TABLE product_stg
( product_code INT
, product_name CHAR(30)
, product_category CHAR(30))engine=INNODB
;
then we have to load data using the following script
/*********************************************************************/
/* */
/* load_product_stg.sql */
/* */
/*********************************************************************/
/* clean up the staging table */
TRUNCATE product_stg;
/* use LOAD DATA INFILE */
LOAD DATA INFILE 'product1.txt'
INTO TABLE product_stg
FIELDS TERMINATED BY ''
OPTIONALLY ENCLOSED BY ''
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES
( product_code
, product_name
, product_category )
;
then we need to run another script which gets the data from product_stg table
into the product_dim
/*********************************************************************/
/* */
/* scd2.sql */
/* */
/*********************************************************************/
/* expire the existing product */
UPDATE
product_dim a
, product_stg b
SET
expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
a.product_code = b.product_code
AND ( a.product_name <> b.product_name
OR a.product_category <> b.product_category )
AND expiry_date = '9999-12-31'
;
/* add a new row for the changing product */
INSERT INTO product_dim
SELECT
NULL
, b.product_code
, b.product_name
, b.product_category
, CURRENT_DATE
, '9999-12-31'
FROM
product_dim a
, product_stg b
WHERE
a.product_code = b.product_code
AND ( a.product_name <> b.product_name
OR a.product_category <> b.product_category )
AND EXISTS
( SELECT * FROM product_dim x
WHERE b.product_code = x.product_code
AND a.expiry_date = SUBDATE(CURRENT_DATE, 1) )
AND NOT EXISTS
( SELECT *
FROM product_dim y
WHERE b.product_code = y.product_code
AND y.expiry_date = '9999-12-31' )
;
/* add new product */
INSERT INTO product_dim
SELECT
NULL
, product_code
, product_name
, product_category
, CURRENT_DATE
, '9999-12-31'
FROM product_stg
WHERE product_code NOT IN(
SELECT y.product_code
FROM product_dim x, product_stg y
WHERE x.product_code = y.product_code )
;
/* end of script */
we will have 2 rows in both the tables, now i would execute the load_product_stg script again, to add one more record and a change in one of the records.(which will be loaded from product2.txt)
this is because i want to apply slowly changing dimensions type 2 to my product_dim table.
so the script,
/*********************************************************************/
/* */
/* load_product_stg.sql */
/* */
/*********************************************************************/
/* clean up the staging table */
TRUNCATE product_stg;
/* use LOAD DATA INFILE */
LOAD DATA INFILE 'product2.txt'
INTO TABLE product_stg
FIELDS TERMINATED BY ''
OPTIONALLY ENCLOSED BY ''
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES
( product_code
, product_name
, product_category )
;
/* end of script */
now we will have 3 records in product_stg;
and when we execute the scd2 script
/*********************************************************************/
/* */
/* scd2.sql */
/* */
/*********************************************************************/
/* expire the existing product */
UPDATE
product_dim a
, product_stg b
SET
expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
a.product_code = b.product_code
AND ( a.product_name <> b.product_name
OR a.product_category <> b.product_category )
AND expiry_date = '9999-12-31'
;
/* add a new row for the changing product */
INSERT INTO product_dim
SELECT
NULL
, b.product_code
, b.product_name
, b.product_category
, CURRENT_DATE
, '9999-12-31'
FROM
product_dim a
, product_stg b
WHERE
a.product_code = b.product_code
AND ( a.product_name <> b.product_name
OR a.product_category <> b.product_category )
AND EXISTS
( SELECT * FROM product_dim x
WHERE b.product_code = x.product_code
AND a.expiry_date = SUBDATE(CURRENT_DATE, 1) )
AND NOT EXISTS
( SELECT *
FROM product_dim y
WHERE b.product_code = y.product_code
AND y.expiry_date = '9999-12-31' )
;
/* add new product */
INSERT INTO product_dim
SELECT
NULL
, product_code
, product_name
, product_category
, CURRENT_DATE
, '9999-12-31'
FROM product_stg
WHERE product_code NOT IN(
SELECT y.product_code
FROM product_dim x, product_stg y
WHERE x.product_code = y.product_code )
;
/* end of script */
we should have 4 rows in product_dim with the values of the product_sk being
1
2
3
4
but the values for product_sk are
1
2
4
5
for some reason it is skipping the number 3, the remaining data looks good
can u please look into this wired behavior.
Note:
-----
1. Tried it with MYISAM Engine the output is correct