diff --git a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference index 20fdcf82c66..d39ecc52c10 100644 --- a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference +++ b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference @@ -1,47 +1,47 @@ check that we have a data -r1 1 1001 3 2 2 -r1 1 2001 1 1 1 -r1 2 1002 1 1 1 -r1 2 2002 1 1 1 -r1 3 1003 2 2 2 -r1 4 1004 2 2 2 -r1 5 2005 2 2 1 -r1 9 1002 1 1 1 -r2 1 1001 3 2 2 -r2 1 2001 1 1 1 -r2 2 1002 1 1 1 -r2 2 2002 1 1 1 -r2 3 1003 2 2 2 -r2 4 1004 2 2 2 -r2 5 2005 2 2 1 -r2 9 1002 1 1 1 +r1 1 1001 3 2 +r1 1 2001 1 1 +r1 2 1002 1 1 +r1 2 2002 1 1 +r1 3 1003 2 2 +r1 4 1004 2 2 +r1 5 2005 2 2 +r1 9 1002 1 1 +r2 1 1001 3 2 +r2 1 2001 1 1 +r2 2 1002 1 1 +r2 2 2002 1 1 +r2 3 1003 2 2 +r2 4 1004 2 2 +r2 5 2005 2 2 +r2 9 1002 1 1 after old OPTIMIZE DEDUPLICATE -r1 1 1001 3 2 2 -r1 1 2001 1 1 1 -r1 2 1002 1 1 1 -r1 2 2002 1 1 1 -r1 3 1003 2 2 2 -r1 4 1004 2 2 2 -r1 5 2005 2 2 1 -r1 9 1002 1 1 1 -r2 1 1001 3 2 2 -r2 1 2001 1 1 1 -r2 2 1002 1 1 1 -r2 2 2002 1 1 1 -r2 3 1003 2 2 2 -r2 4 1004 2 2 2 -r2 5 2005 2 2 1 -r2 9 1002 1 1 1 +r1 1 1001 2 2 +r1 1 2001 1 1 +r1 2 1002 1 1 +r1 2 2002 1 1 +r1 3 1003 2 2 +r1 4 1004 2 2 +r1 5 2005 2 2 +r1 9 1002 1 1 +r2 1 1001 2 2 +r2 1 2001 1 1 +r2 2 1002 1 1 +r2 2 2002 1 1 +r2 3 1003 2 2 +r2 4 1004 2 2 +r2 5 2005 2 2 +r2 9 1002 1 1 check data again after multiple deduplications with new syntax -r1 1 1001 1 1 1 -r1 2 1002 1 1 1 -r1 3 1003 1 1 1 -r1 4 1004 1 1 1 -r1 5 2005 1 1 1 -r1 9 1002 1 1 1 -r2 1 1001 1 1 1 -r2 2 1002 1 1 1 -r2 3 1003 1 1 1 -r2 4 1004 1 1 1 -r2 5 2005 1 1 1 -r2 9 1002 1 1 1 +r1 1 1001 1 1 +r1 2 1002 1 1 +r1 3 1003 1 1 +r1 4 1004 1 1 +r1 5 2005 1 1 +r1 9 1002 1 1 +r2 1 1001 1 1 +r2 2 1002 1 1 +r2 3 1003 1 1 +r2 4 1004 1 1 +r2 5 2005 1 1 +r2 9 1002 1 1 diff --git a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql index b10f8b44483..59e349d38f7 100644 --- a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql +++ b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql @@ -3,58 +3,51 @@ --- replicated case -- Just in case if previous tests run left some stuff behind. -DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r1; -DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r2; +DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r1 SYNC; +DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r2 SYNC; SET replication_alter_partitions_sync = 2; -- IRL insert_replica_id were filled from hostname CREATE TABLE IF NOT EXISTS replicated_deduplicate_by_columns_r1 ( - id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock(), insert_replica_id UInt8 MATERIALIZED randConstant() + id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock() ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01581/replicated_deduplicate', 'r1') ORDER BY id; CREATE TABLE IF NOT EXISTS replicated_deduplicate_by_columns_r2 ( - id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock(), insert_replica_id UInt8 MATERIALIZED randConstant() + id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock() ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01581/replicated_deduplicate', 'r2') ORDER BY id; -SYSTEM STOP REPLICATED SENDS; -SYSTEM STOP FETCHES; -SYSTEM STOP REPLICATION QUEUES; - -- insert some data, 2 records: (3, 1003), (4, 1004) are duplicated and have difference in unique_value / insert_replica_id -- (1, 1001), (5, 2005) has full duplicates INSERT INTO replicated_deduplicate_by_columns_r1 VALUES (1, 1001), (1, 1001), (2, 1002), (3, 1003), (4, 1004), (1, 2001), (9, 1002); INSERT INTO replicated_deduplicate_by_columns_r2 VALUES (1, 1001), (2, 2002), (3, 1003), (4, 1004), (5, 2005), (5, 2005); -SYSTEM START REPLICATION QUEUES; -SYSTEM START FETCHES; -SYSTEM START REPLICATED SENDS; - --- wait for syncing replicas +-- make sure that all data is present on all replicas SYSTEM SYNC REPLICA replicated_deduplicate_by_columns_r2; SYSTEM SYNC REPLICA replicated_deduplicate_by_columns_r1; SELECT 'check that we have a data'; -SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val; -SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val; +SELECT 'r1', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val; +SELECT 'r2', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val; + -- NOTE: here and below we need FINAL to force deduplication in such a small set of data in only 1 part. -- that should remove full duplicates OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE; SELECT 'after old OPTIMIZE DEDUPLICATE'; -SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val; -SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val; +SELECT 'r1', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val; +SELECT 'r2', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val; OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY id, val; OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[id, val]'); -OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[i]') EXCEPT(unique_value, insert_replica_id); +OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[i]') EXCEPT(unique_value); SELECT 'check data again after multiple deduplications with new syntax'; -SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val; -SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val; +SELECT 'r1', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val; +SELECT 'r2', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val; -- cleanup the mess -DROP TABLE replicated_deduplicate_by_columns_r1; -DROP TABLE replicated_deduplicate_by_columns_r2; +--DROP TABLE replicated_deduplicate_by_columns_r1; +--DROP TABLE replicated_deduplicate_by_columns_r2;