From 4d8513024c4bc3dc9d583b1b03ab670d2bc9e63f Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 27 Dec 2022 20:12:09 +0100 Subject: [PATCH 1/8] fix bug that async blocks cleanup not work --- .../ReplicatedMergeTreeCleanupThread.cpp | 12 ++++---- .../ReplicatedMergeTreeCleanupThread.h | 2 +- ...5_cleanup_async_insert_block_ids.reference | 0 .../02515_cleanup_async_insert_block_ids.sh | 30 +++++++++++++++++++ 4 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.reference create mode 100755 tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 315f471fd5c..0957ccddb2d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -328,7 +328,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ auto zookeeper = storage.getZooKeeper(); std::vector timed_blocks; - getBlocksSortedByTime(*zookeeper, timed_blocks); + getBlocksSortedByTime(blocks_dir_name, *zookeeper, timed_blocks); if (timed_blocks.empty()) return; @@ -391,14 +391,14 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ } -void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks) +void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks) { timed_blocks.clear(); Strings blocks; Coordination::Stat stat; - if (Coordination::Error::ZOK != zookeeper.tryGetChildren(storage.zookeeper_path + "/blocks", blocks, &stat)) - throw Exception(storage.zookeeper_path + "/blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE); + if (Coordination::Error::ZOK != zookeeper.tryGetChildren(storage.zookeeper_path + "/" + blocks_dir_name, blocks, &stat)) + throw Exception(ErrorCodes::NOT_FOUND_NODE, "{}/{} doesn't exist", storage.zookeeper_path, blocks_dir_name); /// Seems like this code is obsolete, because we delete blocks from cache /// when they are deleted from zookeeper. But we don't know about all (maybe future) places in code @@ -417,7 +417,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & auto not_cached_blocks = stat.numChildren - cached_block_stats.size(); if (not_cached_blocks) { - LOG_TRACE(log, "Checking {} blocks ({} are not cached){}", stat.numChildren, not_cached_blocks, " to clear old ones from ZooKeeper."); + LOG_TRACE(log, "Checking {} {} ({} are not cached){}", stat.numChildren, blocks_dir_name, not_cached_blocks, " to clear old ones from ZooKeeper."); } std::vector exists_paths; @@ -427,7 +427,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & if (it == cached_block_stats.end()) { /// New block. Fetch its stat asynchronously. - exists_paths.emplace_back(storage.zookeeper_path + "/blocks/" + block); + exists_paths.emplace_back(storage.zookeeper_path + "/" + blocks_dir_name + "/" + block); } else { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index f8731ca0f43..d02b47accd7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -63,7 +63,7 @@ private: struct NodeWithStat; /// Returns list of blocks (with their stat) sorted by ctime in descending order. - void getBlocksSortedByTime(zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks); + void getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks); /// TODO Removing old quorum/failed_parts }; diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.reference b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh new file mode 100755 index 00000000000..c871b7dfd05 --- /dev/null +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-parallel, long, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Check that if the background cleanup thread works correctly. +CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -n --query " + DROP TABLE IF EXISTS t_async_insert_cleanup NO DELAY; + CREATE TABLE t_async_insert_cleanup ( + KeyID UInt32 + ) Engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup', '{replica}') + ORDER BY (KeyID) SETTINGS cleanup_delay_period = 1, cleanup_delay_period_random_add = 1, replicated_deduplication_window_for_async_inserts=10 +" + +for i in {1..100}; do + $CLICKHOUSE_CLIENT --async_insert 1 --async_insert_deduplicate 1 --query "insert into t_async_insert_cleanup values ($i), ($((i + 1))), ($((i + 2)))" +done + +while true; do + answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") + if [ $answer == '10' ]; then + $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" + exit 0 + fi + sleep 1 +done From 7bf4c0a3862c696d0d41d0777e1ece98a92cadbe Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 28 Dec 2022 13:16:35 +0100 Subject: [PATCH 2/8] make test faster --- .../queries/0_stateless/02515_cleanup_async_insert_block_ids.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index c871b7dfd05..8fe0ff38150 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -n --query " " for i in {1..100}; do - $CLICKHOUSE_CLIENT --async_insert 1 --async_insert_deduplicate 1 --query "insert into t_async_insert_cleanup values ($i), ($((i + 1))), ($((i + 2)))" + $CLICKHOUSE_CLIENT --async_insert 1 --async_insert_deduplicate 1 --wait_for_async_insert 0 --query "insert into t_async_insert_cleanup values ($i), ($((i + 1))), ($((i + 2)))" done while true; do From 454f40ab1737aaa5b34adff59bba08389efecdc6 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 30 Dec 2022 16:30:33 +0100 Subject: [PATCH 3/8] fix test --- .../MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 10 +++++----- .../MergeTree/ReplicatedMergeTreeCleanupThread.h | 9 +++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 0957ccddb2d..9dff71106a5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -75,8 +75,8 @@ void ReplicatedMergeTreeCleanupThread::iterate() { clearOldLogs(); auto storage_settings = storage.getSettings(); - clearOldBlocks("blocks", storage_settings->replicated_deduplication_window_seconds, storage_settings->replicated_deduplication_window); - clearOldBlocks("async_blocks", storage_settings->replicated_deduplication_window_seconds_for_async_inserts, storage_settings->replicated_deduplication_window_for_async_inserts); + clearOldBlocks("blocks", storage_settings->replicated_deduplication_window_seconds, storage_settings->replicated_deduplication_window, cached_block_stats_for_sync_inserts); + clearOldBlocks("async_blocks", storage_settings->replicated_deduplication_window_seconds_for_async_inserts, storage_settings->replicated_deduplication_window_for_async_inserts, cached_block_stats_for_async_inserts); clearOldMutations(); storage.clearEmptyParts(); } @@ -323,12 +323,12 @@ struct ReplicatedMergeTreeCleanupThread::NodeWithStat } }; -void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size) +void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats) { auto zookeeper = storage.getZooKeeper(); std::vector timed_blocks; - getBlocksSortedByTime(blocks_dir_name, *zookeeper, timed_blocks); + getBlocksSortedByTime(blocks_dir_name, *zookeeper, timed_blocks, cached_block_stats); if (timed_blocks.empty()) return; @@ -391,7 +391,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_dir_ } -void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks) +void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks, NodeCTimeAndVersionCache & cached_block_stats) { timed_blocks.clear(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index d02b47accd7..35838625bbe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -52,18 +52,19 @@ private: const std::unordered_map & log_pointers_candidate_lost_replicas, size_t replicas_count, const zkutil::ZooKeeperPtr & zookeeper); + using NodeCTimeAndVersionCache = std::map>; /// Remove old block hashes from ZooKeeper. This is done by the leader replica. - void clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size); + void clearOldBlocks(const String & blocks_dir_name, UInt64 window_seconds, UInt64 window_size, NodeCTimeAndVersionCache & cached_block_stats); /// Remove old mutations that are done from ZooKeeper. This is done by the leader replica. void clearOldMutations(); - using NodeCTimeAndVersionCache = std::map>; - NodeCTimeAndVersionCache cached_block_stats; + NodeCTimeAndVersionCache cached_block_stats_for_sync_inserts; + NodeCTimeAndVersionCache cached_block_stats_for_async_inserts; struct NodeWithStat; /// Returns list of blocks (with their stat) sorted by ctime in descending order. - void getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks); + void getBlocksSortedByTime(const String & blocks_dir_name, zkutil::ZooKeeper & zookeeper, std::vector & timed_blocks, NodeCTimeAndVersionCache & cached_block_stats); /// TODO Removing old quorum/failed_parts }; From f42295789172757d88d42b66dc00a5a89c5e6bea Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sat, 31 Dec 2022 21:21:03 +0100 Subject: [PATCH 4/8] output debug info when test fails --- .../0_stateless/02515_cleanup_async_insert_block_ids.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index 8fe0ff38150..4c83c5bb286 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -20,7 +20,7 @@ for i in {1..100}; do $CLICKHOUSE_CLIENT --async_insert 1 --async_insert_deduplicate 1 --wait_for_async_insert 0 --query "insert into t_async_insert_cleanup values ($i), ($((i + 1))), ($((i + 2)))" done -while true; do +for i in {1..300}; do answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") if [ $answer == '10' ]; then $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" @@ -28,3 +28,5 @@ while true; do fi sleep 1 done + +$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") From a0d024b8dfea322bdaaee17fedf0e32850c53006 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 1 Jan 2023 12:55:25 +0100 Subject: [PATCH 5/8] fix shell check --- .../queries/0_stateless/02515_cleanup_async_insert_block_ids.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index 4c83c5bb286..2ce808074be 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -29,4 +29,4 @@ for i in {1..300}; do sleep 1 done -$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") +$CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'" From 51039517b42b03f2e77ad8b73ec4c9c082a1de8e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 1 Jan 2023 15:22:18 +0100 Subject: [PATCH 6/8] print more logs for debugging --- .../0_stateless/02515_cleanup_async_insert_block_ids.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index 2ce808074be..bb35d2c18b5 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -20,6 +20,10 @@ for i in {1..100}; do $CLICKHOUSE_CLIENT --async_insert 1 --async_insert_deduplicate 1 --wait_for_async_insert 0 --query "insert into t_async_insert_cleanup values ($i), ($((i + 1))), ($((i + 2)))" done +sleep 1 + +old_answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") + for i in {1..300}; do answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'") if [ $answer == '10' ]; then @@ -29,4 +33,7 @@ for i in {1..300}; do sleep 1 done +$CLICKHOUSE_CLIENT --query "SELECT count(*) FROM t_async_insert_cleanup" +echo $old_answer $CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'" +$CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;" From eef0136415bc7ccac449ac93f93b20d61ac1639f Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 2 Jan 2023 11:59:44 +0100 Subject: [PATCH 7/8] add debug log --- src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 9dff71106a5..1c667b1c867 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -417,7 +417,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(const String & bloc auto not_cached_blocks = stat.numChildren - cached_block_stats.size(); if (not_cached_blocks) { - LOG_TRACE(log, "Checking {} {} ({} are not cached){}", stat.numChildren, blocks_dir_name, not_cached_blocks, " to clear old ones from ZooKeeper."); + LOG_TRACE(log, "Checking {} {} ({} are not cached){}, path is {}", stat.numChildren, blocks_dir_name, not_cached_blocks, " to clear old ones from ZooKeeper.", storage.zookeeper_path + "/" + blocks_dir_name); } std::vector exists_paths; From 97017068bba42b41fb69ca3da01bef91de64cfd4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 2 Jan 2023 14:52:46 +0100 Subject: [PATCH 8/8] Update 02515_cleanup_async_insert_block_ids.sh --- .../queries/0_stateless/02515_cleanup_async_insert_block_ids.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh index bb35d2c18b5..9e22089d5e1 100755 --- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh +++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, long, no-fasttest +# Tags: zookeeper, no-parallel, long, no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh