From fc223aa6731e15928e94a17f96b87644dc4017ee Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 25 Oct 2022 23:52:31 +0000 Subject: [PATCH 01/28] replace throttler algorithm by token bucket --- src/Common/Throttler.cpp | 80 ++++++++++++++++++---------------------- src/Common/Throttler.h | 37 ++++++++++--------- 2 files changed, 55 insertions(+), 62 deletions(-) diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index 2c9279e21e1..5a0f7e227a6 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -3,7 +3,6 @@ #include #include #include -#include namespace ProfileEvents { @@ -21,63 +20,56 @@ namespace ErrorCodes /// Just 10^9. static constexpr auto NS = 1000000000UL; -/// Tracking window. Actually the size is not really important. We just want to avoid -/// throttles when there are no actions for a long period time. -static const double window_ns = 1ULL * NS; +static const size_t default_burst_seconds = 10; + +Throttler::Throttler(size_t max_speed_, const std::shared_ptr & parent_) + : max_speed(max_speed_) + , max_burst(max_speed_ * default_burst_seconds) + , limit_exceeded_exception_message("") + , tokens(max_burst) + , parent(parent_) +{} + +Throttler::Throttler(size_t max_speed_, size_t limit_, const char * limit_exceeded_exception_message_, + const std::shared_ptr & parent_) + : max_speed(max_speed_) + , max_burst(max_speed_ * default_burst_seconds) + , limit(limit_) + , limit_exceeded_exception_message(limit_exceeded_exception_message_) + , tokens(max_burst) + , parent(parent_) +{} void Throttler::add(size_t amount) { - size_t new_count; - /// This outer variable is always equal to smoothed_speed. - /// We use to avoid race condition. - double current_speed = 0; - + // Values obtained under lock to be checked after release + size_t count_value; + double tokens_value; { std::lock_guard lock(mutex); - auto now = clock_gettime_ns_adjusted(prev_ns); - /// If prev_ns is equal to zero (first `add` call) we known nothing about speed - /// and don't track anything. if (max_speed && prev_ns != 0) { - /// Time spent to process the amount of bytes - double time_spent = now - prev_ns; - - /// The speed in bytes per second is equal to amount / time_spent in seconds - auto new_speed = amount / (time_spent / NS); - - /// We want to make old values of speed less important for our smoothed value - /// so we decay it's value with coef. - auto decay_coeff = std::pow(0.5, time_spent / window_ns); - - /// Weighted average between previous and new speed - smoothed_speed = smoothed_speed * decay_coeff + (1 - decay_coeff) * new_speed; - current_speed = smoothed_speed; + double delta_seconds = static_cast(now - prev_ns) / NS; + tokens = std::max(tokens + max_speed * delta_seconds - amount, max_burst); } - count += amount; - new_count = count; + count_value = count; + tokens_value = tokens; prev_ns = now; } - if (limit && new_count > limit) + if (limit && count_value > limit) throw Exception(limit_exceeded_exception_message + std::string(" Maximum: ") + toString(limit), ErrorCodes::LIMIT_EXCEEDED); - if (max_speed && current_speed > max_speed) + /// Wait unless there is positive amount of tokens - throttling + if (max_speed && tokens_value < 0) { - /// If we was too fast then we have to sleep until our smoothed speed became <= max_speed - int64_t sleep_time = static_cast(-window_ns * std::log2(max_speed / current_speed)); - - if (sleep_time > 0) - { - accumulated_sleep += sleep_time; - - sleepForNanoseconds(sleep_time); - - accumulated_sleep -= sleep_time; - - ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_time / 1000UL); - } + int64_t sleep_time = static_cast(-tokens_value * max_speed * NS); + accumulated_sleep += sleep_time; + sleepForNanoseconds(sleep_time); + accumulated_sleep -= sleep_time; + ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_time / 1000UL); } if (parent) @@ -89,9 +81,9 @@ void Throttler::reset() std::lock_guard lock(mutex); count = 0; - accumulated_sleep = 0; - smoothed_speed = 0; + tokens = max_burst; prev_ns = 0; + // NOTE: do not zero `accumulated_sleep` to avoid races } bool Throttler::isThrottling() const diff --git a/src/Common/Throttler.h b/src/Common/Throttler.h index 6d44ad6ca5f..a33637783e7 100644 --- a/src/Common/Throttler.h +++ b/src/Common/Throttler.h @@ -10,24 +10,26 @@ namespace DB { -/** Allows you to limit the speed of something (in entities per second) using sleep. - * Specifics of work: - * Tracks exponentially (pow of 1/2) smoothed speed with hardcoded window. - * See more comments in .cpp file. - * - * Also allows you to set a limit on the maximum number of entities. If exceeded, an exception will be thrown. +/** Allows you to limit the speed of something (in tokens per second) using sleep. + * Implemented using Token Bucket Throttling algorithm. + * Also allows you to set a limit on the maximum number of tokens. If exceeded, an exception will be thrown. */ class Throttler { public: - explicit Throttler(size_t max_speed_, const std::shared_ptr & parent_ = nullptr) - : max_speed(max_speed_), limit_exceeded_exception_message(""), parent(parent_) {} + Throttler(size_t max_speed_, size_t max_burst_, const std::shared_ptr & parent_ = nullptr) + : max_speed(max_speed_), max_burst(max_burst_), limit_exceeded_exception_message(""), tokens(max_burst), parent(parent_) {} + + explicit Throttler(size_t max_speed_, const std::shared_ptr & parent_ = nullptr); + + Throttler(size_t max_speed_, size_t max_burst_, size_t limit_, const char * limit_exceeded_exception_message_, + const std::shared_ptr & parent_ = nullptr) + : max_speed(max_speed_), max_burst(max_burst_), limit(limit_), limit_exceeded_exception_message(limit_exceeded_exception_message_), tokens(max_burst), parent(parent_) {} Throttler(size_t max_speed_, size_t limit_, const char * limit_exceeded_exception_message_, - const std::shared_ptr & parent_ = nullptr) - : max_speed(max_speed_), limit(limit_), limit_exceeded_exception_message(limit_exceeded_exception_message_), parent(parent_) {} + const std::shared_ptr & parent_ = nullptr); - /// Calculates the smoothed speed, sleeps if required and throws exception on + /// Use `amount` ent, sleeps if required and throws exception on /// limit overflow. void add(size_t amount); @@ -45,15 +47,14 @@ public: private: size_t count{0}; - const size_t max_speed{0}; - const uint64_t limit{0}; /// 0 - not limited. + const size_t max_speed{0}; /// in tokens per second. + const size_t max_burst{0}; /// in tokens. + const uint64_t limit{0}; /// 0 - not limited. const char * limit_exceeded_exception_message = nullptr; std::mutex mutex; - std::atomic accumulated_sleep{0}; - /// Smoothed value of current speed. Updated in `add` method. - double smoothed_speed{0}; - /// previous `add` call time (in nanoseconds) - uint64_t prev_ns{0}; + std::atomic accumulated_sleep{0}; // Accumulated sleep time over all waiting threads + double tokens{0}; /// Amount of tokens available in token bucket. Updated in `add` method. + uint64_t prev_ns{0}; /// Previous `add` call time (in nanoseconds). /// Used to implement a hierarchy of throttlers std::shared_ptr parent; From 27599ab70cf9c3b5b19efe497a554b1266b6210e Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 25 Oct 2022 23:58:25 +0000 Subject: [PATCH 02/28] fix comment --- src/Common/Throttler.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Common/Throttler.h b/src/Common/Throttler.h index a33637783e7..9b6eff13506 100644 --- a/src/Common/Throttler.h +++ b/src/Common/Throttler.h @@ -29,8 +29,7 @@ public: Throttler(size_t max_speed_, size_t limit_, const char * limit_exceeded_exception_message_, const std::shared_ptr & parent_ = nullptr); - /// Use `amount` ent, sleeps if required and throws exception on - /// limit overflow. + /// Use `amount` tokens, sleeps if required or throws exception on limit overflow. void add(size_t amount); /// Not thread safe From 471b391ab2b92ebd18ca019ea00dca2aa6f99c1c Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 26 Oct 2022 00:00:40 +0000 Subject: [PATCH 03/28] fix --- src/Common/Throttler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index 5a0f7e227a6..a3ae966c49b 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -51,7 +51,7 @@ void Throttler::add(size_t amount) if (max_speed && prev_ns != 0) { double delta_seconds = static_cast(now - prev_ns) / NS; - tokens = std::max(tokens + max_speed * delta_seconds - amount, max_burst); + tokens = std::min(tokens + max_speed * delta_seconds - amount, max_burst); } count += amount; count_value = count; From f5f6f1b5933b4d19702f9fabf63fbb4dc02f21a6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 13:39:41 +0200 Subject: [PATCH 04/28] Add profile events for jemalloc purge Signed-off-by: Azat Khuzhin --- src/Common/MemoryTracker.cpp | 6 ++++++ src/Common/ProfileEvents.cpp | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 8bd31681706..41634e8f561 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "config.h" @@ -86,6 +87,8 @@ inline std::string_view toDescription(OvercommitResult result) namespace ProfileEvents { extern const Event QueryMemoryLimitExceeded; + extern const Event MemoryAllocatorPurge; + extern const Event MemoryAllocatorPurgeTimeMicroseconds; } using namespace std::chrono_literals; @@ -229,7 +232,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT { if (free_memory_in_allocator_arenas.exchange(-current_free_memory_in_allocator_arenas) > 0) { + Stopwatch watch; mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); } } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 46bec669626..2f801e496fa 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -229,6 +229,8 @@ The server successfully detected this situation and will download merged part fr M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \ M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \ M(MemoryOvercommitWaitTimeMicroseconds, "Total time spent in waiting for memory to be freed in OvercommitTracker.") \ + M(MemoryAllocatorPurge, "Total number of times memory allocator purge was requested") \ + M(MemoryAllocatorPurgeTimeMicroseconds, "Total number of times memory allocator purge was requested") \ M(SoftPageFaults, "The number of soft page faults in query execution threads. Soft page fault usually means a miss in the memory allocator cache which required a new memory mapping from the OS and subsequent allocation of a page of physical memory.") \ M(HardPageFaults, "The number of hard page faults in query execution threads. High values indicate either that you forgot to turn off swap on your server, or eviction of memory pages of the ClickHouse binary during very high memory pressure, or successful usage of the 'mmap' read method for the tables data.") \ \ From 2faefc0c0973b666397b6ac4a96c6732706e250b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 13:40:37 +0200 Subject: [PATCH 05/28] Add amount of memory used by allocator into logs Signed-off-by: Azat Khuzhin --- src/Interpreters/AsynchronousMetrics.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 338ae1bbbfd..2e43a594960 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -713,9 +713,10 @@ void AsynchronousMetrics::update(TimePoint update_time) /// Log only if difference is high. This is for convenience. The threshold is arbitrary. if (difference >= 1048576 || difference <= -1048576) LOG_TRACE(log, - "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", + "MemoryTracking: was {}, peak {}, free memory in arenas {}, will set to {} (RSS), difference: {}", ReadableSize(amount), ReadableSize(peak), + ReadableSize(free_memory_in_allocator_arenas), ReadableSize(rss), ReadableSize(difference)); From 7b69a70e828a6fab644086aa589d4352c5d86883 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 18:39:36 +0200 Subject: [PATCH 06/28] Fix frequent memory drift message and clarify things in comments Somethine like: 2022.09.28 06:33:34.001433 [ 3133669 ] {} AsynchronousMetrics: MemoryTracking: was 562.20 MiB, peak 562.21 MiB, will set to 562.20 MiB (RSS), difference: -70.46 MiB 2022.09.28 06:33:35.001639 [ 3133669 ] {} AsynchronousMetrics: MemoryTracking: was 562.20 MiB, peak 562.21 MiB, will set to 562.20 MiB (RSS), difference: -70.45 MiB Signed-off-by: Azat Khuzhin --- src/Common/MemoryTracker.cpp | 2 +- src/Interpreters/AsynchronousMetrics.cpp | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 41634e8f561..b530410ec63 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -438,7 +438,7 @@ void MemoryTracker::reset() void MemoryTracker::setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_) { - Int64 new_amount = rss_; // - free_memory_in_allocator_arenas_; + Int64 new_amount = rss_; total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); free_memory_in_allocator_arenas.store(free_memory_in_allocator_arenas_, std::memory_order_relaxed); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 2e43a594960..488ac77e956 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -703,12 +703,18 @@ void AsynchronousMetrics::update(TimePoint update_time) Int64 free_memory_in_allocator_arenas = 0; #if USE_JEMALLOC - /// This is a memory which is kept by allocator. - /// Will subsract it from RSS to decrease memory drift. + /// According to jemalloc man, pdirty is: + /// + /// Number of pages within unused extents that are potentially + /// dirty, and for which madvise() or similar has not been called. + /// + /// So they will be subtracted from RSS to make accounting more + /// accurate, since those pages are not really RSS but a memory + /// that can be used at anytime via jemalloc. free_memory_in_allocator_arenas = je_malloc_pdirty * getPageSize(); #endif - Int64 difference = rss - free_memory_in_allocator_arenas - amount; + Int64 difference = rss - amount; /// Log only if difference is high. This is for convenience. The threshold is arbitrary. if (difference >= 1048576 || difference <= -1048576) From ec1389cbe7514b0ae4c00b8de228fee682cb573a Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 27 Oct 2022 10:30:00 +0200 Subject: [PATCH 07/28] Update src/Common/Throttler.cpp Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Common/Throttler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index a3ae966c49b..e0f2b63e8ae 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -65,7 +65,7 @@ void Throttler::add(size_t amount) /// Wait unless there is positive amount of tokens - throttling if (max_speed && tokens_value < 0) { - int64_t sleep_time = static_cast(-tokens_value * max_speed * NS); + int64_t sleep_time = static_cast(-tokens_value / max_speed * NS); accumulated_sleep += sleep_time; sleepForNanoseconds(sleep_time); accumulated_sleep -= sleep_time; From accf78f1fffae8b04ec14f0586e7038f94e59d51 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 27 Oct 2022 08:39:08 +0000 Subject: [PATCH 08/28] fix the first add() call --- src/Common/Throttler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index e0f2b63e8ae..169262d30c7 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -48,9 +48,9 @@ void Throttler::add(size_t amount) { std::lock_guard lock(mutex); auto now = clock_gettime_ns_adjusted(prev_ns); - if (max_speed && prev_ns != 0) + if (max_speed) { - double delta_seconds = static_cast(now - prev_ns) / NS; + double delta_seconds = prev_ns ? static_cast(now - prev_ns) / NS : 0; tokens = std::min(tokens + max_speed * delta_seconds - amount, max_burst); } count += amount; From c4f25228f73a86a626e5491ba7c7b6057ee4d3ed Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 27 Oct 2022 17:18:50 +0000 Subject: [PATCH 09/28] smaller burst to be closer to old behaviour --- src/Common/Throttler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index 169262d30c7..b38777efc03 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes /// Just 10^9. static constexpr auto NS = 1000000000UL; -static const size_t default_burst_seconds = 10; +static const size_t default_burst_seconds = 1; Throttler::Throttler(size_t max_speed_, const std::shared_ptr & parent_) : max_speed(max_speed_) From 2038db21f485d8b3de230c58e30429012c4d2b1a Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 27 Oct 2022 17:26:12 +0000 Subject: [PATCH 10/28] lower limit to avoid hitting another bottleneck --- .../0_stateless/01288_shard_max_network_bandwidth.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql b/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql index 969bb0a126c..d2daf48a1cb 100644 --- a/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql +++ b/tests/queries/0_stateless/01288_shard_max_network_bandwidth.sql @@ -1,7 +1,7 @@ -- Tags: shard --- Limit to 10 MB/sec -SET max_network_bandwidth = 10000000; +-- Limit to 100 KB/sec +SET max_network_bandwidth = 100000; -- Lower max_block_size, so we can start throttling sooner. Otherwise query will be executed too quickly. SET max_block_size = 100; @@ -11,7 +11,7 @@ CREATE TEMPORARY TABLE times (t DateTime); -- rand64 is uncompressable data. Each number will take 8 bytes of bandwidth. -- This query should execute in no less than 1.6 seconds if throttled. INSERT INTO times SELECT now(); -SELECT sum(ignore(*)) FROM (SELECT rand64() FROM remote('127.0.0.{2,3}', numbers(2000000))); +SELECT sum(ignore(*)) FROM (SELECT rand64() FROM remote('127.0.0.{2,3}', numbers(20000))); INSERT INTO times SELECT now(); SELECT max(t) - min(t) >= 1 FROM times; From acec4475261df88a9139f2743f4034b6d33b9554 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Fri, 28 Oct 2022 12:35:18 +0200 Subject: [PATCH 11/28] Fix anchor links --- docs/en/sql-reference/statements/alter/partition.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index da99c52538f..2d89c1d5d18 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -39,7 +39,7 @@ ALTER TABLE mt DETACH PARTITION '2020-11-21'; ALTER TABLE mt DETACH PART 'all_2_2_0'; ``` -Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). +Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). After the query is executed, you can do whatever you want with the data in the `detached` directory — delete it from the file system, or just leave it. @@ -53,7 +53,7 @@ ALTER TABLE table_name [ON CLUSTER cluster] DROP PARTITION|PART partition_expr Deletes the specified partition from the table. This query tags the partition as inactive and deletes data completely, approximately in 10 minutes. -Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). +Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). The query is replicated – it deletes data on all replicas. @@ -71,7 +71,7 @@ ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partiti ``` Removes the specified part or all parts of the specified partition from `detached`. -Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). +Read more about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). ## ATTACH PARTITION\|PART @@ -86,7 +86,7 @@ ALTER TABLE visits ATTACH PARTITION 201901; ALTER TABLE visits ATTACH PART 201901_2_2_0; ``` -Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). +Read more about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). This query is replicated. The replica-initiator checks whether there is data in the `detached` directory. If data exists, the query checks its integrity. If everything is correct, the query adds the data to the table. @@ -166,7 +166,7 @@ This query creates a local backup of a specified partition. If the `PARTITION` c The entire backup process is performed without stopping the server. ::: -Note that for old-styled tables you can specify the prefix of the partition name (for example, `2019`) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). +Note that for old-styled tables you can specify the prefix of the partition name (for example, `2019`) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression). At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where: From ca5dbe88dbbde7bc139b6bfc864a8a1b52167ca7 Mon Sep 17 00:00:00 2001 From: clarkcaoliu Date: Wed, 19 Oct 2022 03:18:12 +0800 Subject: [PATCH 12/28] match function can use index if prefix --- src/Storages/MergeTree/KeyCondition.cpp | 105 +++++++++++++++++- .../02462_match_regexp_pk.reference | 5 + .../0_stateless/02462_match_regexp_pk.sql | 9 ++ 3 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02462_match_regexp_pk.reference create mode 100644 tests/queries/0_stateless/02462_match_regexp_pk.sql diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 927ca98cc9a..8fc5f13d27d 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -86,6 +86,88 @@ String extractFixedPrefixFromLikePattern(const String & like_pattern) return fixed_prefix; } +/// for "^prefix..." string it returns "prefix" +static String extractFixedPrefixFromRegularExpression(const String & regexp) +{ + if (regexp.size() <= 1 || regexp[0] != '^') + return {}; + + String fixed_prefix; + const char * begin = regexp.data() + 1; + const char * pos = begin; + const char * end = regexp.data() + regexp.size(); + + while (pos != end) + { + switch (*pos) + { + case '\0': + pos = end; + break; + + case '\\': + { + ++pos; + if (pos == end) + break; + + switch (*pos) + { + case '|': + case '(': + case ')': + case '^': + case '$': + case '.': + case '[': + case '?': + case '*': + case '+': + case '{': + fixed_prefix += *pos; + break; + default: + /// all other escape sequences are not supported + pos = end; + break; + } + + ++pos; + break; + } + + /// non-trivial cases + case '|': + fixed_prefix.clear(); + [[fallthrough]]; + case '(': + case '[': + case '^': + case '$': + case '.': + case '+': + pos = end; + break; + + /// Quantifiers that allow a zero number of occurrences. + case '{': + case '?': + case '*': + if (!fixed_prefix.empty()) + fixed_prefix.pop_back(); + + pos = end; + break; + default: + fixed_prefix += *pos; + pos++; + break; + } + } + + return fixed_prefix; +} + /** For a given string, get a minimum string that is strictly greater than all strings with this prefix, * or return an empty string if there are no such strings. @@ -581,6 +663,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } }, + { + "match", + [] (RPNElement & out, const Field & value) + { + if (value.getType() != Field::Types::String) + return false; + + String prefix = extractFixedPrefixFromRegularExpression(value.get()); + if (prefix.empty()) + return false; + + String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix); + + out.function = RPNElement::FUNCTION_IN_RANGE; + out.range = !right_bound.empty() + ? Range(prefix, true, right_bound, false) + : Range::createLeftBounded(prefix, true); + + return true; + } + }, { "isNotNull", [] (RPNElement & out, const Field &) @@ -1738,7 +1841,7 @@ bool KeyCondition::tryParseAtomFromAST(const Tree & node, ContextPtr context, Bl else if (func_name == "in" || func_name == "notIn" || func_name == "like" || func_name == "notLike" || func_name == "ilike" || func_name == "notIlike" || - func_name == "startsWith") + func_name == "startsWith" || func_name == "match") { /// "const IN data_column" doesn't make sense (unlike "data_column IN const") return false; diff --git a/tests/queries/0_stateless/02462_match_regexp_pk.reference b/tests/queries/0_stateless/02462_match_regexp_pk.reference new file mode 100644 index 00000000000..428d6556f4c --- /dev/null +++ b/tests/queries/0_stateless/02462_match_regexp_pk.reference @@ -0,0 +1,5 @@ +4 +1 +3 +4 +4 diff --git a/tests/queries/0_stateless/02462_match_regexp_pk.sql b/tests/queries/0_stateless/02462_match_regexp_pk.sql new file mode 100644 index 00000000000..1a944b96196 --- /dev/null +++ b/tests/queries/0_stateless/02462_match_regexp_pk.sql @@ -0,0 +1,9 @@ +CREATE TABLE mt_match_pk (v String) ENGINE = MergeTree ORDER BY v SETTINGS index_granularity = 1; +INSERT INTO mt_match_pk VALUES ('a'), ('aaa'), ('aba'), ('bac'), ('acccca'); + +SET force_primary_key = 1; +SELECT count() FROM mt_match_pk WHERE match(v, '^a'); +SELECT count() FROM mt_match_pk WHERE match(v, '^ab'); +SELECT count() FROM mt_match_pk WHERE match(v, '^a.'); +SELECT count() FROM mt_match_pk WHERE match(v, '^ab*'); +SELECT count() FROM mt_match_pk WHERE match(v, '^ac?'); From 25d35a97f9a9f19b00cb76834fc28d26e72c8dfd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Oct 2022 18:10:33 +0200 Subject: [PATCH 13/28] Update CCTZ to 2022f --- contrib/cctz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz b/contrib/cctz index 7a454c25c7d..5c8528fb35e 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 7a454c25c7d16053bcd327cdd16329212a08fa4a +Subproject commit 5c8528fb35e89ee0b3a7157490423fba0d4dd7b5 From c68ab231f917bc41b354f7d2a1fa769a7fbb8d06 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 30 Oct 2022 17:30:51 +0100 Subject: [PATCH 14/28] fix accessing part in Deleting state --- src/Storages/MergeTree/MergeTreeData.cpp | 11 ++++++--- src/Storages/MergeTree/MergeTreeData.h | 25 ++++++++++++++++++++- src/Storages/StorageReplicatedMergeTree.cpp | 23 ++++++++++--------- src/Storages/StorageReplicatedMergeTree.h | 2 +- 4 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index bb589161b57..d38c9c465d5 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3142,7 +3142,7 @@ void MergeTreeData::removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(txn, drop_range, lock); } -MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( +MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock) { DataPartsVector parts_to_remove; @@ -3220,15 +3220,20 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSetAn /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock); + /// Since we can return parts in Deleting state, we have to use a wrapper that restricts access to such parts. + PartsToRemoveFromZooKeeper parts_to_remove_from_zookeeper; + for (auto & part : parts_to_remove) + parts_to_remove_from_zookeeper.emplace_back(std::move(part)); + for (auto & part : inactive_parts_to_remove_immediately) { if (!drop_range.contains(part->info)) continue; part->remove_time.store(0, std::memory_order_relaxed); - parts_to_remove.push_back(std::move(part)); + parts_to_remove_from_zookeeper.emplace_back(std::move(part), /* was_active */ false); } - return parts_to_remove; + return parts_to_remove_from_zookeeper; } void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2b67face570..60255ce1b16 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -584,10 +584,33 @@ public: /// Used in REPLACE PARTITION command. void removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); + /// This wrapper is required to restrict access to parts in Deleting state + class PartToRemoveFromZooKeeper + { + DataPartPtr part; + bool was_active; + + public: + PartToRemoveFromZooKeeper(DataPartPtr && part_, bool was_active_ = true) + : part(std::move(part_)), was_active(was_active_) + { + } + + /// It's s to get name of any part + const String & getPartName() const { return part->name; } + + DataPartPtr getPartIfItWasActive() const + { + return was_active ? part : nullptr; + } + }; + + using PartsToRemoveFromZooKeeper = std::vector; + /// Same as above, but also returns list of parts to remove from ZooKeeper. /// It includes parts that have been just removed by these method /// and Outdated parts covered by drop_range that were removed earlier for any reason. - DataPartsVector removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( + PartsToRemoveFromZooKeeper removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper( MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, DataPartsLock & lock); /// Restores Outdated part and adds it to working set diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8dad5755dab..3c0fbb162bc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1827,7 +1827,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// Therefore, we use all data parts. auto metadata_snapshot = getInMemoryMetadataPtr(); - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; { auto data_parts_lock = lockParts(); parts_to_remove = removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range_info, data_parts_lock); @@ -1849,8 +1849,11 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// If DETACH clone parts to detached/ directory for (const auto & part : parts_to_remove) { - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + if (auto part_to_detach = part.getPartIfItWasActive()) + { + LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); + part_to_detach->makeCloneInDetached("", metadata_snapshot); + } } } @@ -1941,7 +1944,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) PartDescriptions all_parts; PartDescriptions parts_to_add; - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; auto table_lock_holder_dst_table = lockForShare( RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1972,7 +1975,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) String parts_to_remove_str; for (const auto & part : parts_to_remove) { - parts_to_remove_str += part->name; + parts_to_remove_str += part.getPartName(); parts_to_remove_str += " "; } LOG_TRACE(log, "Replacing {} parts {}with empty set", parts_to_remove.size(), parts_to_remove_str); @@ -2248,7 +2251,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) String parts_to_remove_str; for (const auto & part : parts_to_remove) { - parts_to_remove_str += part->name; + parts_to_remove_str += part.getPartName(); parts_to_remove_str += " "; } LOG_TRACE(log, "Replacing {} parts {}with {} parts {}", parts_to_remove.size(), parts_to_remove_str, @@ -6230,11 +6233,11 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() } -void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(DataPartsVector & parts, size_t max_retries) +void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(PartsToRemoveFromZooKeeper & parts, size_t max_retries) { Strings part_names_to_remove; for (const auto & part : parts) - part_names_to_remove.emplace_back(part->name); + part_names_to_remove.emplace_back(part.getPartName()); return removePartsFromZooKeeperWithRetries(part_names_to_remove, max_retries); } @@ -6561,7 +6564,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( if (replace) clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block); - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; Coordination::Responses op_results; try @@ -6797,7 +6800,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.max_block, drop_range.max_block); - DataPartsVector parts_to_remove; + PartsToRemoveFromZooKeeper parts_to_remove; Coordination::Responses op_results; try diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 5213f963fdf..323b1ce02bf 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -549,7 +549,7 @@ private: /// Remove parts from ZooKeeper, throw exception if unable to do so after max_retries. void removePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries = 5); - void removePartsFromZooKeeperWithRetries(DataPartsVector & parts, size_t max_retries = 5); + void removePartsFromZooKeeperWithRetries(PartsToRemoveFromZooKeeper & parts, size_t max_retries = 5); /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name); From 978aa16e29d67cfeed8abead1e99802983448699 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Sun, 30 Oct 2022 16:42:57 +0000 Subject: [PATCH 15/28] Fix a bug in ParserCreateUserQuery --- src/Parsers/Access/ParserCreateUserQuery.cpp | 5 ++--- .../0_stateless/02474_create_user_query_fuzzer_bug.reference | 0 .../0_stateless/02474_create_user_query_fuzzer_bug.sql | 1 + 3 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.reference create mode 100644 tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 9e32b3c4618..ed6ecb62667 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -295,11 +295,11 @@ namespace } - bool parseHosts(IParserBase::Pos & pos, Expected & expected, const String & prefix, AllowedClientHosts & hosts) + bool parseHosts(IParserBase::Pos & pos, Expected & expected, std::string_view prefix, AllowedClientHosts & hosts) { return IParserBase::wrapParseImpl(pos, [&] { - if (!prefix.empty() && !ParserKeyword{prefix.c_str()}.ignore(pos, expected)) + if (!prefix.empty() && !ParserKeyword{prefix}.ignore(pos, expected)) return false; if (!ParserKeyword{"HOST"}.ignore(pos, expected)) @@ -492,7 +492,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (alter) { - String maybe_new_name; if (!new_name && (names->size() == 1) && parseRenameTo(pos, expected, new_name)) continue; diff --git a/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.reference b/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql b/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql new file mode 100644 index 00000000000..3ef1469cf1b --- /dev/null +++ b/tests/queries/0_stateless/02474_create_user_query_fuzzer_bug.sql @@ -0,0 +1 @@ +EXPLAIN AST ALTER user WITH a; -- { clientError SYNTAX_ERROR } From f53df7870c50dab252969a7bf37e3f776686888d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 30 Oct 2022 17:59:47 +0100 Subject: [PATCH 16/28] fix race between drop and failed insert --- src/Storages/MergeTree/MergeTreeData.cpp | 25 +++++++++++++++++++++--- src/Storages/MergeTree/MergeTreeData.h | 3 +++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d38c9c465d5..a9e726f25fd 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2199,6 +2199,7 @@ void MergeTreeData::dropAllData() LOG_TRACE(log, "dropAllData: removing all data parts from memory."); data_parts_indexes.clear(); + all_data_dropped = true; } catch (...) { @@ -5181,9 +5182,27 @@ void MergeTreeData::Transaction::rollback() buf << "."; LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str()); - data.removePartsFromWorkingSet(txn, - DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()), - /* clear_without_timeout = */ true); + auto lock = data.lockParts(); + + if (data.data_parts_indexes.empty()) + { + /// Table was dropped concurrently and all parts (including PreActive parts) were cleared, so there's nothing to rollback + if (!data.all_data_dropped) + { + Strings part_names; + for (const auto & part : precommitted_parts) + part_names.emplace_back(part->name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are some PreActive parts ({}) to rollback, " + "but data parts set is empty and table {} was not dropped. It's a bug", + fmt::join(part_names, ", "), data.getStorageID().getNameForLogs()); + } + } + else + { + data.removePartsFromWorkingSet(txn, + DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()), + /* clear_without_timeout = */ true, &lock); + } } clear(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 60255ce1b16..40eaa679845 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -663,6 +663,9 @@ public: /// Deletes the data directory and flushes the uncompressed blocks cache and the marks cache. void dropAllData(); + /// This flag is for hardening and assertions. + bool all_data_dropped = false; + /// Drop data directories if they are empty. It is safe to call this method if table creation was unsuccessful. void dropIfEmpty(); From 620caeb07c9a09f855c3deb65e30de2674b4504f Mon Sep 17 00:00:00 2001 From: Gabriel Date: Mon, 31 Oct 2022 13:20:58 +0800 Subject: [PATCH 17/28] Fix typo in comments --- src/Processors/Executors/ExecutingGraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 651ede10cfd..c7e89d7547c 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -71,7 +71,7 @@ bool ExecutingGraph::addEdges(uint64_t node) } } - /// Add direct edges form output ports. + /// Add direct edges from output ports. auto & outputs = from->getOutputs(); auto from_output = nodes[node]->direct_edges.size(); From b62170426a43702bc5f9ad8854a9b468fe4788b3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 31 Oct 2022 10:48:37 +0100 Subject: [PATCH 18/28] Fix compilation of LLVM with cmake cache Simple reproducer: $ cmake $ ninja contrib/llvm-project/llvm/lib/MC/MCParser/CMakeFiles/LLVMMCParser.dir/MasmParser.cpp.o # will have -std=c++14 $ touch CMakeLists.txt $ cmake $ ninja contrib/llvm-project/llvm/lib/MC/MCParser/CMakeFiles/LLVMMCParser.dir/MasmParser.cpp.o # will have -std=c++20 and fail (fails because std::vector cannot work with opaque types anymore) Fixes: #42249 (cc @rschu1ze) --- contrib/llvm-project-cmake/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 6a73ae0f0c6..7af4a23bc9d 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -21,6 +21,9 @@ set (LLVM_INCLUDE_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include" ) set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") +# NOTE: You should not remove this line since otherwise it will use default 20, +# and llvm cannot be compiled with bundled libcxx and 20 standard. +set (CMAKE_CXX_STANDARD 14) # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. set (REQUIRED_LLVM_LIBRARIES From b677e68c4eadcc7b59e6a6094506cfe23cc19707 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Mon, 31 Oct 2022 12:46:14 +0100 Subject: [PATCH 19/28] Update column.md --- docs/en/sql-reference/statements/alter/column.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index f36aa1357f4..cc278465437 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -107,7 +107,7 @@ ALTER TABLE visits RENAME COLUMN webBrowser TO browser CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name ``` -Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to specify the partition expression](#alter-how-to-specify-part-expr). +Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](partition.md#how-to-set-partition-expression). If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist. From e43ecf9ca0f9c66661b706a2f80896d43264daa1 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Mon, 31 Oct 2022 12:52:31 +0100 Subject: [PATCH 20/28] Link to proper place in docs --- docs/en/sql-reference/statements/optimize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 680ff773992..036d3f0599a 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -22,7 +22,7 @@ The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/me When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`). - If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](../../sql-reference/statements/alter/index.md#alter-how-to-specify-part-expr). +- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). - If you specify `FINAL`, optimization is performed even when all the data is already in one part. Also merge is forced even if concurrent merges are performed. - If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. From b2b9479afa810ce7e69f820ecc4a3e0b5db4e6a7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 31 Oct 2022 16:15:35 +0300 Subject: [PATCH 21/28] Update src/Storages/MergeTree/MergeTreeData.h Co-authored-by: Sergei Trifonov --- src/Storages/MergeTree/MergeTreeData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 40eaa679845..cfad11b8d36 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -591,7 +591,7 @@ public: bool was_active; public: - PartToRemoveFromZooKeeper(DataPartPtr && part_, bool was_active_ = true) + explicit PartToRemoveFromZooKeeper(DataPartPtr && part_, bool was_active_ = true) : part(std::move(part_)), was_active(was_active_) { } From 599ccb99396a84151b280bdf809ff54d04e8c38a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 31 Oct 2022 16:18:17 +0300 Subject: [PATCH 22/28] Update MergeTreeData.h --- src/Storages/MergeTree/MergeTreeData.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index cfad11b8d36..99ba6991e43 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -596,7 +596,7 @@ public: { } - /// It's s to get name of any part + /// It's safe to get name of any part const String & getPartName() const { return part->name; } DataPartPtr getPartIfItWasActive() const From 9fb6f5228675add4662c52cacb70d38dd571776d Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Mon, 31 Oct 2022 11:02:08 -0400 Subject: [PATCH 23/28] Fix Missing Quotes - Sonar Nightly Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index e6da4df7200..5a208807c81 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -149,7 +149,7 @@ jobs: env: BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip run: | - curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip ${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" + curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/" echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH" - name: Set Up Build Tools From 440cc51a7e7aa6df4bde96c895965048a710f3c1 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Mon, 31 Oct 2022 14:11:52 -0400 Subject: [PATCH 24/28] Fix Missing Env Vars - Sonar Nightly Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 5a208807c81..904c0dffe57 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -128,6 +128,8 @@ jobs: SONAR_SCANNER_VERSION: 4.7.0.2747 SONAR_SERVER_URL: "https://sonarcloud.io" BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed + CC: clang-15 + CXX: clang++-15 steps: - uses: actions/checkout@v2 with: From d18d08bcc78431977abe735e3bc6793e6e2308c2 Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Tue, 1 Nov 2022 04:36:15 +0800 Subject: [PATCH 25/28] Remove short-circuit evaluation in AssociativeApplierImpl::apply (#42214) The short-circuit evaluation was implemented when applying the saturable operators (and, or) on a vector of ColumnUInt8. However, its control flow would be compiled as a series of conditional branch instructions which are hard to predict by the hardware and at the same time hinder the vectorization optimization by the compiler. This commit removes the short-circuit and evaluates the whole expression. --- src/Functions/FunctionsLogical.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 2ac7688737f..7e52c55e5b0 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -168,10 +168,7 @@ public: inline ResultValueType apply(const size_t i) const { const auto a = !!vec[i]; - if constexpr (Op::isSaturable()) - return Op::isSaturatedValue(a) ? a : Op::apply(a, next.apply(i)); - else - return Op::apply(a, next.apply(i)); + return Op::apply(a, next.apply(i)); } private: From 439ddc2bf7d07122cd15d8054b7c9400ad29578e Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Mon, 31 Oct 2022 16:53:58 -0400 Subject: [PATCH 26/28] exclude java Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 904c0dffe57..94fe404ea97 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -175,4 +175,5 @@ jobs: --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \ --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.projectKey="ClickHouse_ClickHouse" \ - --define sonar.organization="clickhouse-java" + --define sonar.organization="clickhouse-java" \ + --define sonar.exclusions="**/*.java" From baf0e92e60185ade936ec0caa4ff0cd2960d6751 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Mon, 31 Oct 2022 17:58:43 -0400 Subject: [PATCH 27/28] exclude more stuff Signed-off-by: Julio Jimenez --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 94fe404ea97..612bb1f8f9b 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -176,4 +176,4 @@ jobs: --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.projectKey="ClickHouse_ClickHouse" \ --define sonar.organization="clickhouse-java" \ - --define sonar.exclusions="**/*.java" + --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" From 70eaf69df176a3cc2dee84f88fbb22fc1185478b Mon Sep 17 00:00:00 2001 From: Alexander Yakovlev <33040934+AlexJameson@users.noreply.github.com> Date: Tue, 1 Nov 2022 02:57:55 +0300 Subject: [PATCH 28/28] Fix a typo in table-engines/integrations/s3.md --- docs/en/engines/table-engines/integrations/s3.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 986a29b8307..db983ab9c68 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -139,7 +139,7 @@ The following settings can be specified in configuration file for given endpoint - `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`. - `region` — Specifies S3 region name. Optional. - `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. - `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional.