ClickHouse/tests/queries/0_stateless/02124_insert_deduplication_token_replica.sql

-- insert data duplicates by providing deduplication token on insert

DROP TABLE IF EXISTS insert_dedup_token1 SYNC;
DROP TABLE IF EXISTS insert_dedup_token2 SYNC;

select 'create replica 1 and check deduplication';
CREATE TABLE insert_dedup_token1 (
    id Int32, val UInt32
) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/insert_dedup_token', 'r1') ORDER BY id;

select 'two inserts with exact data, one inserted, one deduplicated by data digest';
INSERT INTO insert_dedup_token1 VALUES(1, 1001);
INSERT INTO insert_dedup_token1 VALUES(1, 1001);
SELECT * FROM insert_dedup_token1 ORDER BY id;

select 'two inserts with the same dedup token, one inserted, one deduplicated by the token';
set insert_deduplication_token = '1';
INSERT INTO insert_dedup_token1 VALUES(1, 1001);
INSERT INTO insert_dedup_token1 VALUES(2, 1002);
SELECT * FROM insert_dedup_token1 ORDER BY id;

select 'reset deduplication token and insert new row';
set insert_deduplication_token = '';
INSERT INTO insert_dedup_token1 VALUES(2, 1002);
SELECT * FROM insert_dedup_token1 ORDER BY id;

select 'create replica 2 and check deduplication';
CREATE TABLE insert_dedup_token2 (
    id Int32, val UInt32
) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/insert_dedup_token', 'r2') ORDER BY id;
SYSTEM SYNC REPLICA insert_dedup_token2;

select 'inserted value deduplicated by data digest, the same result as before';
set insert_deduplication_token = '';
INSERT INTO insert_dedup_token2 VALUES(1, 1001); -- deduplicated by data digest
SELECT * FROM insert_dedup_token2 ORDER BY id;

select 'inserted value deduplicated by dedup token, the same result as before';
set insert_deduplication_token = '1';
INSERT INTO insert_dedup_token2 VALUES(3, 1003); -- deduplicated by dedup token
SELECT * FROM insert_dedup_token2 ORDER BY id;

select 'new record inserted by providing new deduplication token';
set insert_deduplication_token = '2';
INSERT INTO insert_dedup_token2  VALUES(2, 1002); -- inserted
SELECT * FROM insert_dedup_token2 ORDER BY id;

DROP TABLE insert_dedup_token1 SYNC;
DROP TABLE insert_dedup_token2 SYNC;
insert_deduplication_token setting for INSERT statement The setting allows a user to provide own deduplication semantic in ReplicatedMergeTree If provided, it's used instead of data digest to generate block ID So, for example, by providing a unique value for the setting in each INSERT statement, user can avoid the same inserted data being deduplicated Inserting data within the same INSERT statement are split into blocks according to the insert_block_size* settings (max_insert_block_size, min_insert_block_size_rows, min_insert_block_size_bytes). Each block with the same INSERT statement will get an ordinal number. The ordinal number is added to insert_deduplication_token to get block dedup token i.e. <token>_0, <token>_1, ... Deduplication is done per block So, to guarantee deduplication for two same INSERT queries, dedup token and number of blocks to have to be the same Issue: #7461 2021-11-21 20:39:42 +00:00			`-- insert data duplicates by providing deduplication token on insert`

			`DROP TABLE IF EXISTS insert_dedup_token1 SYNC;`
			`DROP TABLE IF EXISTS insert_dedup_token2 SYNC;`

			`select 'create replica 1 and check deduplication';`
			`CREATE TABLE insert_dedup_token1 (`
			`id Int32, val UInt32`
			`) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/insert_dedup_token', 'r1') ORDER BY id;`

			`select 'two inserts with exact data, one inserted, one deduplicated by data digest';`
			`INSERT INTO insert_dedup_token1 VALUES(1, 1001);`
			`INSERT INTO insert_dedup_token1 VALUES(1, 1001);`
			`SELECT * FROM insert_dedup_token1 ORDER BY id;`

			`select 'two inserts with the same dedup token, one inserted, one deduplicated by the token';`
			`set insert_deduplication_token = '1';`
			`INSERT INTO insert_dedup_token1 VALUES(1, 1001);`
			`INSERT INTO insert_dedup_token1 VALUES(2, 1002);`
			`SELECT * FROM insert_dedup_token1 ORDER BY id;`

			`select 'reset deduplication token and insert new row';`
			`set insert_deduplication_token = '';`
			`INSERT INTO insert_dedup_token1 VALUES(2, 1002);`
			`SELECT * FROM insert_dedup_token1 ORDER BY id;`

			`select 'create replica 2 and check deduplication';`
			`CREATE TABLE insert_dedup_token2 (`
			`id Int32, val UInt32`
			`) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/insert_dedup_token', 'r2') ORDER BY id;`
Fix test for insert_deduplication_token + sync replica before checks in replica test + use different table names for multi block tests 2022-01-10 10:09:29 +00:00			`SYSTEM SYNC REPLICA insert_dedup_token2;`
insert_deduplication_token setting for INSERT statement The setting allows a user to provide own deduplication semantic in ReplicatedMergeTree If provided, it's used instead of data digest to generate block ID So, for example, by providing a unique value for the setting in each INSERT statement, user can avoid the same inserted data being deduplicated Inserting data within the same INSERT statement are split into blocks according to the insert_block_size* settings (max_insert_block_size, min_insert_block_size_rows, min_insert_block_size_bytes). Each block with the same INSERT statement will get an ordinal number. The ordinal number is added to insert_deduplication_token to get block dedup token i.e. <token>_0, <token>_1, ... Deduplication is done per block So, to guarantee deduplication for two same INSERT queries, dedup token and number of blocks to have to be the same Issue: #7461 2021-11-21 20:39:42 +00:00
			`select 'inserted value deduplicated by data digest, the same result as before';`
			`set insert_deduplication_token = '';`
			`INSERT INTO insert_dedup_token2 VALUES(1, 1001); -- deduplicated by data digest`
			`SELECT * FROM insert_dedup_token2 ORDER BY id;`

			`select 'inserted value deduplicated by dedup token, the same result as before';`
			`set insert_deduplication_token = '1';`
			`INSERT INTO insert_dedup_token2 VALUES(3, 1003); -- deduplicated by dedup token`
			`SELECT * FROM insert_dedup_token2 ORDER BY id;`

			`select 'new record inserted by providing new deduplication token';`
			`set insert_deduplication_token = '2';`
			`INSERT INTO insert_dedup_token2 VALUES(2, 1002); -- inserted`
			`SELECT * FROM insert_dedup_token2 ORDER BY id;`

			`DROP TABLE insert_dedup_token1 SYNC;`
			`DROP TABLE insert_dedup_token2 SYNC;`