From bc785bff25e17083f079518c2891842dc06552e9 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 30 Aug 2021 14:25:08 +0300 Subject: [PATCH 1/5] Fix sed argument in test/fuzzer/run-fuzzer.sh --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 9a389edc5b2..c62851ee38c 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -76,7 +76,7 @@ function filter_exists_and_template local path for path in "$@"; do if [ -e "$path" ]; then - echo "$path" | sed -n 's/\.sql\.j2$/.gen.sql/' + echo "$path" | sed 's/\.sql\.j2$/.gen.sql/' else echo "'$path' does not exists" >&2 fi From 887ac96a1931031fe615d0999699c27752c6d304 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 30 Aug 2021 14:27:01 +0300 Subject: [PATCH 2/5] Whitespace in 0_stateless/01015_empty_in_inner_right_join.sql.j2 --- .../01015_empty_in_inner_right_join.sql.j2 | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql.j2 b/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql.j2 index 6c13654598e..cdb9d253b9b 100644 --- a/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql.j2 +++ b/tests/queries/0_stateless/01015_empty_in_inner_right_join.sql.j2 @@ -4,25 +4,25 @@ SET joined_subquery_requires_alias = 0; SET join_algorithm = '{{ join_algorithm }}'; -SELECT 'IN empty set',count() FROM system.numbers WHERE number IN (SELECT toUInt64(1) WHERE 0); -SELECT 'IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); -SELECT 'NOT IN empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number NOT IN (SELECT toUInt64(1) WHERE 0); +SELECT 'IN empty set', count() FROM system.numbers WHERE number IN (SELECT toUInt64(1) WHERE 0); +SELECT 'IN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); +SELECT 'NOT IN empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number NOT IN (SELECT toUInt64(1) WHERE 0); -SELECT 'INNER JOIN empty set',count() FROM system.numbers INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON system.numbers.number = x; -SELECT 'INNER JOIN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; +SELECT 'INNER JOIN empty set', count() FROM system.numbers INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON system.numbers.number = x; +SELECT 'INNER JOIN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; -SELECT 'RIGHT JOIN empty set',count() FROM system.numbers RIGHT JOIN (SELECT toUInt64(1) AS x WHERE 0) ON system.numbers.number = x; -SELECT 'RIGHT JOIN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 RIGHT JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; +SELECT 'RIGHT JOIN empty set', count() FROM system.numbers RIGHT JOIN (SELECT toUInt64(1) AS x WHERE 0) ON system.numbers.number = x; +SELECT 'RIGHT JOIN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 RIGHT JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; -SELECT 'LEFT JOIN empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 LEFT JOIN (SELECT toUInt64(1) AS x WHERE 0) ON t1.number = x; -SELECT 'LEFT JOIN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 LEFT JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; +SELECT 'LEFT JOIN empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 LEFT JOIN (SELECT toUInt64(1) AS x WHERE 0) ON t1.number = x; +SELECT 'LEFT JOIN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 LEFT JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x; -SELECT 'multiple sets IN empty set OR IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number IN (SELECT toUInt64(1) WHERE 0) OR number IN (SELECT toUInt64(1) WHERE 1); -SELECT 'multiple sets IN empty set OR NOT IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number IN (SELECT toUInt64(1) WHERE 0) OR number NOT IN (SELECT toUInt64(1) WHERE 1); -SELECT 'multiple sets NOT IN empty set AND IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number NOT IN (SELECT toUInt64(1) WHERE 0) AND number IN (SELECT toUInt64(1) WHERE 1); -SELECT 'multiple sets INNER JOIN empty set AND IN empty set',count() FROM system.numbers INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON system.numbers.number = x WHERE system.numbers.number IN (SELECT toUInt64(1) WHERE 0); -SELECT 'multiple sets INNER JOIN empty set AND IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON t1.number = x WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); -SELECT 'multiple sets INNER JOIN non-empty set AND IN non-empty set',count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); +SELECT 'multiple sets IN empty set OR IN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number IN (SELECT toUInt64(1) WHERE 0) OR number IN (SELECT toUInt64(1) WHERE 1); +SELECT 'multiple sets IN empty set OR NOT IN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number IN (SELECT toUInt64(1) WHERE 0) OR number NOT IN (SELECT toUInt64(1) WHERE 1); +SELECT 'multiple sets NOT IN empty set AND IN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) WHERE number NOT IN (SELECT toUInt64(1) WHERE 0) AND number IN (SELECT toUInt64(1) WHERE 1); +SELECT 'multiple sets INNER JOIN empty set AND IN empty set', count() FROM system.numbers INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON system.numbers.number = x WHERE system.numbers.number IN (SELECT toUInt64(1) WHERE 0); +SELECT 'multiple sets INNER JOIN empty set AND IN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 0) ON t1.number = x WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); +SELECT 'multiple sets INNER JOIN non-empty set AND IN non-empty set', count() FROM (SELECT number FROM system.numbers LIMIT 10) t1 INNER JOIN (SELECT toUInt64(1) AS x WHERE 1) ON t1.number = x WHERE t1.number IN (SELECT toUInt64(1) WHERE 1); SELECT 'IN empty set equals 0', count() FROM numbers(10) WHERE (number IN (SELECT toUInt64(1) WHERE 0)) = 0; SELECT 'IN empty set sum if', sum(if(number IN (SELECT toUInt64(1) WHERE 0), 2, 1)) FROM numbers(10); From ab45c412d0027f286966d5970c0e3510f1c8a6b1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 30 Aug 2021 15:45:32 +0300 Subject: [PATCH 3/5] Another try to fix BackgroundPoolTask decrement. --- .../MergeTree/BackgroundJobsExecutor.cpp | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp index f3d957117e8..7c784de9ebb 100644 --- a/src/Storages/MergeTree/BackgroundJobsExecutor.cpp +++ b/src/Storages/MergeTree/BackgroundJobsExecutor.cpp @@ -84,28 +84,55 @@ bool incrementMetricIfLessThanMax(std::atomic & atomic_value, Int64 max_v } +/// This is a RAII class which only decrements metric. +/// It is added because after all other fixes a bug non-executing merges was occurred again. +/// Last hypothesis: task was successfully added to pool, however, was not executed because of internal exception in it. +class ParanoidMetricDecrementor +{ +public: + explicit ParanoidMetricDecrementor(CurrentMetrics::Metric metric_) : metric(metric_) {} + void alarm() { is_alarmed = true; } + void decrement() + { + if (is_alarmed.exchange(false)) + { + CurrentMetrics::values[metric]--; + } + } + + ~ParanoidMetricDecrementor() { decrement(); } + +private: + + CurrentMetrics::Metric metric; + std::atomic_bool is_alarmed = false; +}; + void IBackgroundJobExecutor::execute(JobAndPool job_and_pool) try { auto & pool_config = pools_configs[job_and_pool.pool_type]; const auto max_pool_size = pool_config.get_max_pool_size(); + auto metric_decrementor = std::make_shared(pool_config.tasks_metric); + /// If corresponding pool is not full increment metric and assign new job if (incrementMetricIfLessThanMax(CurrentMetrics::values[pool_config.tasks_metric], max_pool_size)) { + metric_decrementor->alarm(); try /// this try required because we have to manually decrement metric { /// Synchronize pool size, because config could be reloaded pools[job_and_pool.pool_type].setMaxThreads(max_pool_size); pools[job_and_pool.pool_type].setQueueSize(max_pool_size); - pools[job_and_pool.pool_type].scheduleOrThrowOnError([this, pool_config, job{std::move(job_and_pool.job)}] () + pools[job_and_pool.pool_type].scheduleOrThrowOnError([this, metric_decrementor, job{std::move(job_and_pool.job)}] () { try /// We don't want exceptions in background pool { bool job_success = job(); /// Job done, decrement metric and reset no_work counter - CurrentMetrics::values[pool_config.tasks_metric]--; + metric_decrementor->decrement(); if (job_success) { @@ -121,7 +148,7 @@ try } catch (...) { - CurrentMetrics::values[pool_config.tasks_metric]--; + metric_decrementor->decrement(); tryLogCurrentException(__PRETTY_FUNCTION__); scheduleTask(/* with_backoff = */ true); } @@ -133,7 +160,7 @@ try catch (...) { /// With our Pool settings scheduleOrThrowOnError shouldn't throw exceptions, but for safety catch added here - CurrentMetrics::values[pool_config.tasks_metric]--; + metric_decrementor->decrement(); tryLogCurrentException(__PRETTY_FUNCTION__); scheduleTask(/* with_backoff = */ true); } From 0828548b88cc8b28374e0ae578d545b0f4ec86c5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 29 Aug 2021 13:54:40 +0300 Subject: [PATCH 4/5] Fix intersecting parts due to new part had been replaced with an empty part AFAICS the problem is that some parts may be replaced with empty parts (after #25820), and removed by the cleanup thread, due to it is empty [1] (while it should not be deleted since it can download source part):
``` 2021.08.18 20:11:22.687933 [ 341 ] {} test_dpefxp.alter_table_1 (0758ca24-90e7-452c-8758-ca2490e7252c): Created log entry for mutation -1_115_115_0_146 ... 2021.08.18 20:11:22.707609 [ 22825 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Executing log entry to mutate part -1_115_115_0 to -1_115_115_0_146 2021.08.18 20:11:22.707643 [ 22825 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Source part -1_115_115_0 for -1_115_115_0_146 is not ready; will try to fetch it instead ... 2021.08.18 20:11:22.709397 [ 333 ] {} test_dpefxp.alter_table_6 (ReplicatedMergeTreeQueue): Not executing log entry queue-0000001579 for part -1_115_115_0 because it is covered by part -1_115_115_0_146 that is currently executing. 2021.08.18 20:11:22.718861 [ 22825 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): DB::Exception: No active replica has part -1_115_115_0_146 or covering part ... 2021.08.18 20:11:27.936829 [ 295 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Going to replace lost part -1_115_115_0_146 with empty part 2021.08.18 20:11:27.957839 [ 295 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Created empty part -1_115_115_0_146 instead of lost part ... 2021.08.18 20:11:28.731635 [ 257 ] {} test_dpefxp.alter_table_6 (ReplicatedMergeTreeCleanupThread): Cleared 190 old blocks from ZooKeeper ... 2021.08.18 20:11:28.734507 [ 257 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Will try to insert a log entry to DROP_RANGE for part: -1_115_115_0_146 2021.08.18 20:11:28.779373 [ 22837 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Removed 1 parts inside -1_115_115_0_146. ... 2021.08.18 20:11:28.792600 [ 273 ] {} test_dpefxp.alter_table_6 (766ae414-e113-4965-b66a-e414e1137965): Created log entry /clickhouse/tables/00993_system_parts_race_condition_drop_zookeeper_test_dpefxp/alter_table/log/log-0000003459 for merge -1_111_118_1 ... 2021.08.18 20:11:28.910988 [ 354 ] {} test_dpefxp.alter_table_7 (ReplicatedMergeTreeQueue): Code: 49. DB::Exception: Part -1_111_118_1 intersects next part -1_115_115_0_146. It is a bug. (LOGICAL_ERROR), Stack trace (when copying this message, always include the lines below): 2021.08.18 20:11:31.282160 [ 305 ] {} test_dpefxp.alter_table_2 (ReplicatedMergeTreeQueue): Code: 49. DB::Exception: Part -1_111_118_1 intersects next part -1_115_115_0_146. It is a bug. (LOGICAL_ERROR), Stack trace (when copying this message, always include the lines below): ```
[1]: https://clickhouse-test-reports.s3.yandex.net/27752/59e3cb18f4e53c453951267b5599afeb664290d8/functional_stateless_tests_(release,_wide_parts_enabled).html --- src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 797d0570fbc..0efa83237ca 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -192,7 +192,7 @@ void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible( if (missing_part_search_result == MissingPartSearchResult::LostForever) { auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); - if (lost_part_info.level != 0) + if (lost_part_info.level != 0 || lost_part_info.mutation != 0) { Strings source_parts; bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); From 4cbc1aba199b607aaa38ee74da7861b89bb37abc Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Mon, 30 Aug 2021 16:22:36 +0300 Subject: [PATCH 5/5] Disable SC2001 shellcheck in docker/test/fuzzer/run-fuzzer.sh --- docker/test/fuzzer/run-fuzzer.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index c62851ee38c..603c35ede54 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# shellcheck disable=SC2086 +# shellcheck disable=SC2086,SC2001 set -eux set -o pipefail @@ -76,6 +76,9 @@ function filter_exists_and_template local path for path in "$@"; do if [ -e "$path" ]; then + # SC2001 shellcheck suggests: + # echo ${path//.sql.j2/.gen.sql} + # but it doesn't allow to use regex echo "$path" | sed 's/\.sql\.j2$/.gen.sql/' else echo "'$path' does not exists" >&2