mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Fixed test to be less flaky
Also logging expanded list of columns passed from `DEDUPLICATE BY` to actual deduplication routines.
This commit is contained in:
parent
8c5daf0925
commit
59fc301344
@ -658,6 +658,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
const MergeTreeData::DataPartsVector & parts = future_part.parts;
|
||||
|
||||
LOG_DEBUG(log, "Merging {} parts: from {} to {} into {}", parts.size(), parts.front()->name, parts.back()->name, future_part.type.toString());
|
||||
if (deduplicate)
|
||||
{
|
||||
if (deduplicate_by_columns.empty())
|
||||
LOG_DEBUG(log, "DEDUPLICATE BY all columns");
|
||||
else
|
||||
LOG_DEBUG(log, "DEDUPLICATE BY ('{}')", fmt::join(deduplicate_by_columns, "', '"));
|
||||
}
|
||||
|
||||
auto disk = space_reservation->getDisk();
|
||||
String part_path = data.relative_data_path;
|
||||
|
@ -1045,6 +1045,14 @@ bool StorageMergeTree::optimize(
|
||||
const Names & deduplicate_by_columns,
|
||||
const Context & context)
|
||||
{
|
||||
if (deduplicate)
|
||||
{
|
||||
if (deduplicate_by_columns.empty())
|
||||
LOG_DEBUG(log, "DEDUPLICATE BY all columns");
|
||||
else
|
||||
LOG_DEBUG(log, "DEDUPLICATE BY ('{}')", fmt::join(deduplicate_by_columns, "', '"));
|
||||
}
|
||||
|
||||
String disable_reason;
|
||||
if (!partition && final)
|
||||
{
|
||||
|
@ -1,36 +1,36 @@
|
||||
check that we have a data
|
||||
r1 1 1001 2 1 1
|
||||
r1 1 1001 3 2 2
|
||||
r1 1 2001 1 1 1
|
||||
r1 2 1002 1 1 1
|
||||
r1 2 2002 1 1 1
|
||||
r1 3 1003 2 2 2
|
||||
r1 4 1004 2 2 2
|
||||
r1 5 2005 2 1 1
|
||||
r1 5 2005 2 2 1
|
||||
r1 9 1002 1 1 1
|
||||
r2 1 1001 2 1 1
|
||||
r2 1 1001 3 2 2
|
||||
r2 1 2001 1 1 1
|
||||
r2 2 1002 1 1 1
|
||||
r2 2 2002 1 1 1
|
||||
r2 3 1003 2 2 2
|
||||
r2 4 1004 2 2 2
|
||||
r2 5 2005 2 1 1
|
||||
r2 5 2005 2 2 1
|
||||
r2 9 1002 1 1 1
|
||||
after old OPTIMIZE DEDUPLICATE
|
||||
r1 1 1001 1 1 1
|
||||
r1 1 1001 3 2 2
|
||||
r1 1 2001 1 1 1
|
||||
r1 2 1002 1 1 1
|
||||
r1 2 2002 1 1 1
|
||||
r1 3 1003 2 2 2
|
||||
r1 4 1004 2 2 2
|
||||
r1 5 2005 1 1 1
|
||||
r1 5 2005 2 2 1
|
||||
r1 9 1002 1 1 1
|
||||
r2 1 1001 1 1 1
|
||||
r2 1 1001 3 2 2
|
||||
r2 1 2001 1 1 1
|
||||
r2 2 1002 1 1 1
|
||||
r2 2 2002 1 1 1
|
||||
r2 3 1003 2 2 2
|
||||
r2 4 1004 2 2 2
|
||||
r2 5 2005 1 1 1
|
||||
r2 5 2005 2 2 1
|
||||
r2 9 1002 1 1 1
|
||||
check data again after multiple deduplications with new syntax
|
||||
r1 1 1001 1 1 1
|
||||
|
@ -10,46 +10,51 @@ SET replication_alter_partitions_sync = 2;
|
||||
|
||||
-- IRL insert_replica_id were filled from hostname
|
||||
CREATE TABLE IF NOT EXISTS replicated_deduplicate_by_columns_r1 (
|
||||
id Int32, val UInt32, insert_time_ns DateTime64(9) MATERIALIZED now64(9), insert_replica_id UInt8 MATERIALIZED randConstant()
|
||||
id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock(), insert_replica_id UInt8 MATERIALIZED randConstant()
|
||||
) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01581/replicated_deduplicate', 'r1') ORDER BY id;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS replicated_deduplicate_by_columns_r2 (
|
||||
id Int32, val UInt32, insert_time_ns DateTime64(9) MATERIALIZED now64(9), insert_replica_id UInt8 MATERIALIZED randConstant()
|
||||
id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock(), insert_replica_id UInt8 MATERIALIZED randConstant()
|
||||
) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01581/replicated_deduplicate', 'r2') ORDER BY id;
|
||||
|
||||
-- insert some data, 2 records: (3, 1003), (4, 1004) are duplicated and have difference in insert_time_ns / insert_replica_id
|
||||
|
||||
SYSTEM STOP REPLICATED SENDS;
|
||||
SYSTEM STOP FETCHES;
|
||||
SYSTEM STOP REPLICATION QUEUES;
|
||||
|
||||
-- insert some data, 2 records: (3, 1003), (4, 1004) are duplicated and have difference in unique_value / insert_replica_id
|
||||
-- (1, 1001), (5, 2005) has full duplicates
|
||||
INSERT INTO replicated_deduplicate_by_columns_r1 VALUES (1, 1001), (1, 1001), (2, 1002), (3, 1003), (4, 1004), (1, 2001), (9, 1002);
|
||||
INSERT INTO replicated_deduplicate_by_columns_r2 VALUES (2, 2002), (3, 1003), (4, 1004), (5, 2005), (5, 2005);
|
||||
INSERT INTO replicated_deduplicate_by_columns_r2 VALUES (1, 1001), (2, 2002), (3, 1003), (4, 1004), (5, 2005), (5, 2005);
|
||||
|
||||
SYSTEM START REPLICATION QUEUES;
|
||||
SYSTEM START FETCHES;
|
||||
SYSTEM START REPLICATED SENDS;
|
||||
|
||||
-- wait for syncing replicas
|
||||
SYSTEM SYNC REPLICA replicated_deduplicate_by_columns_r2;
|
||||
SYSTEM SYNC REPLICA replicated_deduplicate_by_columns_r1;
|
||||
|
||||
SELECT 'check that we have a data';
|
||||
SELECT 'r1', id, val, count(), uniqExact(insert_time_ns), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r2', id, val, count(), uniqExact(insert_time_ns), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
|
||||
|
||||
-- NOTE: here and below we need FINAL to force deduplication in such a small set of data in only 1 part.
|
||||
-- that should remove full duplicates
|
||||
OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE;
|
||||
|
||||
SELECT 'after old OPTIMIZE DEDUPLICATE';
|
||||
SELECT 'r1', id, val, count(), uniqExact(insert_time_ns), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r2', id, val, count(), uniqExact(insert_time_ns), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
|
||||
|
||||
OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY id, val;
|
||||
OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[id, val]');
|
||||
OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY * EXCEPT(insert_time_ns, insert_replica_id);
|
||||
OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[i]') EXCEPT(insert_time_ns, insert_replica_id);
|
||||
OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[i]') EXCEPT(unique_value, insert_replica_id);
|
||||
|
||||
SELECT 'check data again after multiple deduplications with new syntax';
|
||||
SELECT 'r1', id, val, count(), uniqExact(insert_time_ns), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r2', id, val, count(), uniqExact(insert_time_ns), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
|
||||
SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
|
||||
|
||||
-- cleanup the mess
|
||||
DROP TABLE replicated_deduplicate_by_columns_r1;
|
||||
DROP TABLE replicated_deduplicate_by_columns_r2;
|
||||
|
||||
SYSTEM DROP REPLICA '/clickhouse/tables/test_01581/replicated_deduplicate/replicas/r1';
|
||||
SYSTEM DROP REPLICA '/clickhouse/tables/test_01581/replicated_deduplicate/replicas/r2';
|
||||
|
Loading…
Reference in New Issue
Block a user