From 0ad6205fa6bd5849dce3071235ba7def6a49233c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 15 Apr 2021 21:34:53 +0300 Subject: [PATCH] logs for debuging test failures with Replicated and Keeper --- src/Interpreters/DDLWorker.cpp | 13 ++++++++++++ .../ReplicatedMergeTreeCleanupThread.cpp | 13 +++++++++--- ...0953_zookeeper_suetin_deduplication_bug.sh | 8 +------- .../01305_replica_create_drop_zookeeper.sh | 20 +++++-------------- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 6081f06b25f..1c023f757f8 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -372,7 +372,20 @@ void DDLWorker::scheduleTasks(bool reinitialized) } Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, queue_updated_event); + size_t size_before_filtering = queue_nodes.size(); filterAndSortQueueNodes(queue_nodes); + /// The following message is too verbose, but it can be useful too debug mysterious test failures in CI + LOG_TRACE(log, "scheduleTasks: initialized={}, size_before_filtering={}, queue_size={}, " + "entries={}..{}, " + "first_failed_task_name={}, current_tasks_size={}," + "last_current_task={}," + "last_skipped_entry_name={}", + initialized, size_before_filtering, queue_nodes.size(), + queue_nodes.empty() ? "none" : queue_nodes.front(), queue_nodes.empty() ? "none" : queue_nodes.back(), + first_failed_task_name ? *first_failed_task_name : "none", current_tasks.size(), + current_tasks.empty() ? "none" : current_tasks.back()->entry_name, + last_skipped_entry_name ? *last_skipped_entry_name : "none"); + if (max_tasks_in_queue < queue_nodes.size()) cleanup_event->set(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 792a77d5e1a..502c6215a9a 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -342,6 +342,15 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold); + auto num_nodes_to_delete = timed_blocks.end() - first_outdated_block; + if (!num_nodes_to_delete) + return; + + auto last_outdated_block = timed_blocks.end() - 1; + LOG_TRACE(log, "Will clear {} old blocks from {} (ctime {}) to {} (ctime {})", num_nodes_to_delete, + first_outdated_block->node, first_outdated_block->ctime, + last_outdated_block->node, last_outdated_block->ctime); + zkutil::AsyncResponses try_remove_futures; for (auto it = first_outdated_block; it != timed_blocks.end(); ++it) { @@ -372,9 +381,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() first_outdated_block++; } - auto num_nodes_to_delete = timed_blocks.end() - first_outdated_block; - if (num_nodes_to_delete) - LOG_TRACE(log, "Cleared {} old blocks from ZooKeeper", num_nodes_to_delete); + LOG_TRACE(log, "Cleared {} old blocks from ZooKeeper", num_nodes_to_delete); } diff --git a/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh b/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh index baa2b0cf53f..71ca29bfd96 100755 --- a/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh +++ b/tests/queries/0_stateless/00953_zookeeper_suetin_deduplication_bug.sh @@ -21,15 +21,12 @@ ORDER BY (engine_id) SETTINGS replicated_deduplication_window = 2, cleanup_delay_period=4, cleanup_delay_period_random_add=0;" $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 1, 'hello')" -sleep 1 $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 2, 'hello')" -sleep 1 $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 3, 'hello')" $CLICKHOUSE_CLIENT --query="SELECT count(*) from elog" # 3 rows count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'") - while [[ $count != 2 ]] do sleep 1 @@ -39,9 +36,8 @@ done $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 1, 'hello')" $CLICKHOUSE_CLIENT --query="SELECT count(*) from elog" # 4 rows + count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'") - - while [[ $count != 2 ]] do sleep 1 @@ -53,12 +49,10 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 2, 'h $CLICKHOUSE_CLIENT --query="SELECT count(*) from elog" # 5 rows count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'") - while [[ $count != 2 ]] do sleep 1 count=$($CLICKHOUSE_CLIENT --query="SELECT COUNT(*) FROM system.zookeeper where path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/elog/s1/blocks'") - done $CLICKHOUSE_CLIENT --query="INSERT INTO elog VALUES (toDate('2018-10-01'), 2, 'hello')" diff --git a/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh index 01bb9af461c..e7b8091284a 100755 --- a/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh +++ b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh @@ -8,21 +8,11 @@ set -e function thread() { - db_engine=`$CLICKHOUSE_CLIENT -q "SELECT engine FROM system.databases WHERE name='$CLICKHOUSE_DATABASE'"` - if [[ $db_engine == "Atomic" ]]; then - # Ignore "Replica already exists" exception - while true; do - $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1 NO DELAY; - CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 | - grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed|some other replicas were created at the same time|already exists' - done - else - while true; do - $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1; - CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 | - grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed|some other replicas were created at the same time' - done - fi + while true; do + $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1 SYNC; + CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 | + grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed|some other replicas were created at the same time' + done }