From cea82aab5970eeddad04cbabc27407c0c1dc0ff9 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 26 May 2024 20:43:49 +0000 Subject: [PATCH 01/44] add dynamic untracked memory limits for more precise memory tracking --- src/Common/CurrentMemoryTracker.cpp | 9 +++++++++ src/Common/CurrentMemoryTracker.h | 2 ++ src/Common/ThreadStatus.h | 12 ++++++++++++ src/Core/Settings.h | 1 + src/Interpreters/ThreadStatusExt.cpp | 10 +++++++--- 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 02c7dc6e224..6166119eccf 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -57,6 +57,7 @@ AllocationTrace CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory { auto res = memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); current_thread->untracked_memory = 0; + current_thread->updateUntrackedMemoryLimit(memory_tracker->get()); return res; } else @@ -84,6 +85,13 @@ void CurrentMemoryTracker::check() std::ignore = memory_tracker->allocImpl(0, true); } +Int64 CurrentMemoryTracker::get() +{ + if (auto * memory_tracker = getMemoryTracker()) + return memory_tracker->get(); + return 0; +} + AllocationTrace CurrentMemoryTracker::alloc(Int64 size) { bool throw_if_memory_exceeded = true; @@ -107,6 +115,7 @@ AllocationTrace CurrentMemoryTracker::free(Int64 size) { Int64 untracked_memory = current_thread->untracked_memory; current_thread->untracked_memory = 0; + current_thread->updateUntrackedMemoryLimit(memory_tracker->get() + untracked_memory); return memory_tracker->free(-untracked_memory); } } diff --git a/src/Common/CurrentMemoryTracker.h b/src/Common/CurrentMemoryTracker.h index 18a1e3f49b1..401eeed93dd 100644 --- a/src/Common/CurrentMemoryTracker.h +++ b/src/Common/CurrentMemoryTracker.h @@ -12,7 +12,9 @@ struct CurrentMemoryTracker /// This function should be called after memory deallocation. [[nodiscard]] static AllocationTrace free(Int64 size); + static void check(); + [[nodiscard]] static Int64 get(); /// Throws MEMORY_LIMIT_EXCEEDED (if it's allowed to throw exceptions) static void injectFault(); diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 0c02ab8fdb0..04fb568540b 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -183,6 +183,12 @@ public: Int64 untracked_memory = 0; /// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters. Int64 untracked_memory_limit = 4 * 1024 * 1024; + /// To keep total untracked memory limited to `untracked_memory_ratio * RSS` we have to account threads with small and large memory footprint differently. + /// For this purpose we dynamically change `untracked_memory_limit` after every tracking event using a simple formula: + /// untracked_memory_limit = clamp(untracked_memory_ratio * cur_memory_bytes, min_untracked_memory, max_untracked_memory) + /// Note that this values are updated when thread is attached to a group + Int64 min_untracked_memory = 4 * 1024 * 1024; + Int64 max_untracked_memory = 4 * 1024; /// Statistics of read and write rows/bytes Progress progress_in; @@ -309,6 +315,12 @@ public: void initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period); + void updateUntrackedMemoryLimit(Int64 current) + { + constexpr Int64 untracked_memory_ratio_bits = 4; // untracked_memory_ratio = 1.0 / (1 << untracked_memory_ratio_bits) = 1.0 / 16 = 6.25% + untracked_memory_limit = std::clamp(current >> untracked_memory_ratio_bits, min_untracked_memory, max_untracked_memory); + } + private: void applyGlobalSettings(); void applyQuerySettings(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f0389e7e2d5..28b068b9e37 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -489,6 +489,7 @@ class IColumn; M(UInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \ M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ + M(UInt64, min_untracked_memory, (4 * 1024), "Lower bound for untracked memory limit which is applied to threads with low memory consumption. Untracked memory limit equals thread_memory_usage/16 and clamped between min_untracked_memory and max_untracked_memory for every thread.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 9ca521a4ab3..981c7d45d8e 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -210,9 +211,12 @@ void ThreadStatus::applyQuerySettings() query_id_from_query_context = query_context_ptr->getCurrentQueryId(); initQueryProfiler(); - untracked_memory_limit = settings.max_untracked_memory; - if (settings.memory_profiler_step && settings.memory_profiler_step < static_cast(untracked_memory_limit)) - untracked_memory_limit = settings.memory_profiler_step; + max_untracked_memory = settings.max_untracked_memory; + if (settings.memory_profiler_step && settings.memory_profiler_step < static_cast(max_untracked_memory)) + max_untracked_memory = settings.memory_profiler_step; + min_untracked_memory = std::min(settings.min_untracked_memory, max_untracked_memory); + + updateUntrackedMemoryLimit(CurrentMemoryTracker::get()); #if defined(OS_LINUX) /// Set "nice" value if required. From e0c8ae8f4baf9a9571aaa02e7d8a06610cf91d9e Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 27 May 2024 10:44:14 +0000 Subject: [PATCH 02/44] fix tests --- src/Core/SettingsChangesHistory.h | 1 + tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 16f28d94640..4c087060179 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -106,6 +106,7 @@ static std::map sett {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, + {"min_untracked_memory", 4_MiB, 4_KiB, "A new setting."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index 69bd15e3f54..68472a93c9c 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -7,7 +7,8 @@ -- sizeof(HLL) is (2^K * 6 / 8) -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 -SET use_uncompressed_cache = 0; +SET use_uncompressed_cache = 0; +SET min_untracked_memory = 4194304; -- 4MiB -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; From 54735e6292ebbce528a4a0681d294ac56c71cbb5 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 27 May 2024 17:52:09 +0000 Subject: [PATCH 03/44] fix --- src/Common/ThreadStatus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 04fb568540b..49594116b91 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -187,8 +187,8 @@ public: /// For this purpose we dynamically change `untracked_memory_limit` after every tracking event using a simple formula: /// untracked_memory_limit = clamp(untracked_memory_ratio * cur_memory_bytes, min_untracked_memory, max_untracked_memory) /// Note that this values are updated when thread is attached to a group - Int64 min_untracked_memory = 4 * 1024 * 1024; - Int64 max_untracked_memory = 4 * 1024; + Int64 min_untracked_memory = 4 * 1024; + Int64 max_untracked_memory = 4 * 1024 * 1024; /// Statistics of read and write rows/bytes Progress progress_in; From c973addee64c4dba156ad6ea741afdf97e8a46cd Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 27 May 2024 19:13:56 +0000 Subject: [PATCH 04/44] disable precise memory tracking for some tests --- tests/integration/test_settings_constraints_distributed/test.py | 2 +- .../0_stateless/03030_system_flush_distributed_settings.sql | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index fbebbac276e..295347192bd 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -136,7 +136,7 @@ def test_select_clamps_settings(): ) assert ( - distributed.query(query, settings={"max_memory_usage": 1}) + distributed.query(query, settings={"max_memory_usage": 1, "min_untracked_memory": 4194304}) == "node1\tmax_memory_usage\t11111111\n" "node1\treadonly\t0\n" "node2\tmax_memory_usage\t0\n" diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql index da2a387e07c..e8a3da174a6 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql @@ -13,6 +13,8 @@ create table dist_out as data engine=Distributed(test_shard_localhost, currentDa set prefer_localhost_replica=0; +set min_untracked_memory='4Mi' -- Disable precise memory tracking + insert into dist_in select number/100, number from system.numbers limit 1e6 settings max_memory_usage='20Mi'; system flush distributed dist_in; -- { serverError MEMORY_LIMIT_EXCEEDED } system flush distributed dist_in settings max_memory_usage=0; From 18dce4169f1b3a3692f4975fb688a3b137b547c4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 27 May 2024 19:22:24 +0000 Subject: [PATCH 05/44] Automatic style fix --- .../integration/test_settings_constraints_distributed/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index 295347192bd..a1f44af1069 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -136,7 +136,9 @@ def test_select_clamps_settings(): ) assert ( - distributed.query(query, settings={"max_memory_usage": 1, "min_untracked_memory": 4194304}) + distributed.query( + query, settings={"max_memory_usage": 1, "min_untracked_memory": 4194304} + ) == "node1\tmax_memory_usage\t11111111\n" "node1\treadonly\t0\n" "node2\tmax_memory_usage\t0\n" From 1c9f4da6b081832c61842beb2a40c209beb2e5b7 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 28 May 2024 11:16:32 +0000 Subject: [PATCH 06/44] turn off dynamic untracked limit memory for not-attached threads (clients and tests) --- src/Common/ThreadStatus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 49594116b91..db4854da707 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -187,7 +187,7 @@ public: /// For this purpose we dynamically change `untracked_memory_limit` after every tracking event using a simple formula: /// untracked_memory_limit = clamp(untracked_memory_ratio * cur_memory_bytes, min_untracked_memory, max_untracked_memory) /// Note that this values are updated when thread is attached to a group - Int64 min_untracked_memory = 4 * 1024; + Int64 min_untracked_memory = 4 * 1024 * 1024; // Default value is kept 4MB mostly for tests and client (should be changed to 4KB as default value a setting) Int64 max_untracked_memory = 4 * 1024 * 1024; /// Statistics of read and write rows/bytes From d07c6461e2d480cad7d95aeceed070f78d42bfc5 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 28 May 2024 14:17:33 +0000 Subject: [PATCH 07/44] fix syntax error --- .../0_stateless/03030_system_flush_distributed_settings.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql index e8a3da174a6..7961444dbc2 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql @@ -13,7 +13,7 @@ create table dist_out as data engine=Distributed(test_shard_localhost, currentDa set prefer_localhost_replica=0; -set min_untracked_memory='4Mi' -- Disable precise memory tracking +set min_untracked_memory='4Mi'; -- Disable precise memory tracking insert into dist_in select number/100, number from system.numbers limit 1e6 settings max_memory_usage='20Mi'; system flush distributed dist_in; -- { serverError MEMORY_LIMIT_EXCEEDED } From 28e71af95cb4008ce791dceaf381d84e32d716e5 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 29 May 2024 14:55:32 +0000 Subject: [PATCH 08/44] disable precise memory tracking to avoid memory_exceeded exception in test --- tests/integration/test_failed_async_inserts/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_failed_async_inserts/test.py b/tests/integration/test_failed_async_inserts/test.py index ecb506c36bc..3a6159107ac 100644 --- a/tests/integration/test_failed_async_inserts/test.py +++ b/tests/integration/test_failed_async_inserts/test.py @@ -46,7 +46,7 @@ def test_failed_async_inserts(started_cluster): ) select_query = ( - "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery'" + "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery' SETTINGS min_untracked_memory = 4194304" ) assert node.query(select_query) == "4\n" From c083896c590d547e4ed3649259d4ef4b00fd91d0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 29 May 2024 15:06:31 +0000 Subject: [PATCH 09/44] Automatic style fix --- tests/integration/test_failed_async_inserts/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_failed_async_inserts/test.py b/tests/integration/test_failed_async_inserts/test.py index 3a6159107ac..2bb56b250ea 100644 --- a/tests/integration/test_failed_async_inserts/test.py +++ b/tests/integration/test_failed_async_inserts/test.py @@ -45,9 +45,7 @@ def test_failed_async_inserts(started_cluster): ignore_error=True, ) - select_query = ( - "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery' SETTINGS min_untracked_memory = 4194304" - ) + select_query = "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery' SETTINGS min_untracked_memory = 4194304" assert node.query(select_query) == "4\n" From 47b45fdc1fc8521ad91a69677b1cb398771b2bfb Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 30 May 2024 16:46:13 +0000 Subject: [PATCH 10/44] add hysteresis of untracked memory --- src/Common/CurrentMemoryTracker.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 6166119eccf..b1dcded0b23 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -111,7 +111,8 @@ AllocationTrace CurrentMemoryTracker::free(Int64 size) if (current_thread) { current_thread->untracked_memory -= size; - if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) + // Note that we use `max_untracked_memory` and not `untracked_memory_limit` to create hysteresis to avoid track/untrack cycles + if (current_thread->untracked_memory < -current_thread->max_untracked_memory) { Int64 untracked_memory = current_thread->untracked_memory; current_thread->untracked_memory = 0; From 3f74783302f545971b0ec7bfec954e91209dc0b6 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 31 May 2024 09:11:58 +0000 Subject: [PATCH 11/44] adjust settings history changes --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4c087060179..ecb4960a06a 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,6 +92,7 @@ static std::map sett {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"min_untracked_memory", 4_MiB, 4_KiB, "A new setting."}, }}, {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, @@ -106,7 +107,6 @@ static std::map sett {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - {"min_untracked_memory", 4_MiB, 4_KiB, "A new setting."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, From 6a8adb6d487db7789f2c2f4f72103cb5e14b2281 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 3 Jun 2024 16:34:53 +0200 Subject: [PATCH 12/44] Refactor change and add failing test case --- src/Common/StringUtils.h | 12 ++++++++++++ src/Common/UTF8Helpers.cpp | 18 +++++++++++++++--- .../03142_skip_ANSI_in_UTF8_compute_width.sql | 7 ++++++- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/Common/StringUtils.h b/src/Common/StringUtils.h index fe5fc3c058f..e4c7ab3e80c 100644 --- a/src/Common/StringUtils.h +++ b/src/Common/StringUtils.h @@ -140,6 +140,18 @@ inline bool isPrintableASCII(char c) return uc >= 32 && uc <= 126; /// 127 is ASCII DEL. } +inline bool isCSIParameterByte(char c) +{ + uint8_t uc = c; + return uc >= 0x30 && uc <= 0x3F; /// ASCII 0–9:;<=>? +} + +inline bool isCSIIntermediateByte(char c) +{ + uint8_t uc = c; + return uc >= 0x20 && uc <= 0x2F; /// ASCII !"#$%&'()*+,-./ +} + inline bool isCSIFinalByte(char c) { uint8_t uc = c; diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index 8c8c8e8327b..34eba832113 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -147,10 +147,22 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l while (i < size && isPrintableASCII(data[i])) { - if (!isEscapeSequence) + auto isParameterByte = isCSIParameterByte(data[i]); + auto isIntermediateByte = isCSIIntermediateByte(data[i]); + auto ignore_width = isEscapeSequence & (isParameterByte || isIntermediateByte); + + if (ignore_width || (data[i] == '[' && isEscapeSequence)) + { + /// don't count the width + } + else if (isEscapeSequence && isCSIFinalByte(data[i])) + { + isEscapeSequence = false; + } + else + { ++width; - else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b') - isEscapeSequence = false; /// end of CSI escape sequence reached + } ++i; } diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql index e37b0db08e9..f4b0bfe5888 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql @@ -1 +1,6 @@ -SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 0) AS x FORMAT Pretty; +SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; +SELECT format('\x1b[38;2;{0};{1};{2}m█ test \x1b[0m', 255, 128, 128) AS x; +SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test', 255, 128, 128) AS x; +SELECT format('test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; +SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; +SELECT visibleWidth('0};{1};{2}m█'); \ No newline at end of file From acfe2876b57aa4766e15df4a955991c19eb9dc8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 3 Jun 2024 21:06:02 +0200 Subject: [PATCH 13/44] Fix OrderByLimitByDuplicateEliminationVisitor across subqueries --- ...OrderByLimitByDuplicateEliminationPass.cpp | 7 +--- .../03165_order_by_duplicate.reference | 39 +++++++++++++++++++ .../0_stateless/03165_order_by_duplicate.sql | 16 ++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03165_order_by_duplicate.reference create mode 100644 tests/queries/0_stateless/03165_order_by_duplicate.sql diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp index 26ca5984b49..15919c4a2fe 100644 --- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp +++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp @@ -22,6 +22,7 @@ public: if (query_node->hasOrderBy()) { + QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; QueryTreeNodes result_nodes; auto & query_order_by_nodes = query_node->getOrderBy().getNodes(); @@ -45,10 +46,9 @@ public: query_order_by_nodes = std::move(result_nodes); } - unique_expressions_nodes_set.clear(); - if (query_node->hasLimitBy()) { + QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; QueryTreeNodes result_nodes; auto & query_limit_by_nodes = query_node->getLimitBy().getNodes(); @@ -63,9 +63,6 @@ public: query_limit_by_nodes = std::move(result_nodes); } } - -private: - QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; }; } diff --git a/tests/queries/0_stateless/03165_order_by_duplicate.reference b/tests/queries/0_stateless/03165_order_by_duplicate.reference new file mode 100644 index 00000000000..5d5e7a33f4a --- /dev/null +++ b/tests/queries/0_stateless/03165_order_by_duplicate.reference @@ -0,0 +1,39 @@ +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + JOIN TREE + TABLE id: 3, alias: __table1, table_name: default.test, final: 1 + WHERE + FUNCTION id: 4, function_name: in, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + QUERY id: 6, is_subquery: 1, is_distinct: 1 + PROJECTION COLUMNS + id UInt64 + PROJECTION + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: id, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test, final: 1 + ORDER BY + LIST id: 10, nodes: 1 + SORT id: 11, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 8, column_name: id, result_type: UInt64, source_id: 9 + LIMIT + CONSTANT id: 12, constant_value: UInt64_4, constant_value_type: UInt64 + ORDER BY + LIST id: 13, nodes: 1 + SORT id: 14, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + LIMIT BY LIMIT + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt64 + LIMIT BY + LIST id: 16, nodes: 1 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/03165_order_by_duplicate.sql b/tests/queries/0_stateless/03165_order_by_duplicate.sql new file mode 100644 index 00000000000..0054cbc36a6 --- /dev/null +++ b/tests/queries/0_stateless/03165_order_by_duplicate.sql @@ -0,0 +1,16 @@ +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); + +EXPLAIN QUERY TREE SELECT id +FROM test FINAL +WHERE id IN ( + SELECT DISTINCT id + FROM test FINAL + ORDER BY id ASC + LIMIT 4 +) +ORDER BY id ASC +LIMIT 1 BY id +SETTINGS allow_experimental_analyzer = 1; From 8b9bb1d47309c2ca927b9d50026b7dcc9be7b164 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 4 Jun 2024 14:09:32 +0200 Subject: [PATCH 14/44] Fix incorrect width calculation --- src/Common/UTF8Helpers.cpp | 18 ++++++++---------- .../03142_skip_ANSI_in_UTF8_compute_width.sql | 7 ++----- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index 34eba832113..006ec33c08b 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -116,6 +116,11 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l while (i + 15 < size) { + if (isEscapeSequence) + { + break; + } + __m128i bytes = _mm_loadu_si128(reinterpret_cast(&data[i])); const uint16_t non_regular_width_mask = _mm_movemask_epi8( @@ -132,15 +137,8 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l } else { - if (isEscapeSequence) - { - break; - } - else - { - i += 16; - width += 16; - } + i += 16; + width += 16; } } #endif @@ -149,7 +147,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l { auto isParameterByte = isCSIParameterByte(data[i]); auto isIntermediateByte = isCSIIntermediateByte(data[i]); - auto ignore_width = isEscapeSequence & (isParameterByte || isIntermediateByte); + auto ignore_width = isEscapeSequence && (isParameterByte || isIntermediateByte); if (ignore_width || (data[i] == '[' && isEscapeSequence)) { diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql index f4b0bfe5888..812e7124526 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql @@ -1,6 +1,3 @@ SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; -SELECT format('\x1b[38;2;{0};{1};{2}m█ test \x1b[0m', 255, 128, 128) AS x; -SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test', 255, 128, 128) AS x; -SELECT format('test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; -SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; -SELECT visibleWidth('0};{1};{2}m█'); \ No newline at end of file +SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x +SELECT visibleWidth(format('\x1b[38;2;{0};{1};{2}m█\x1b[0m',255,128,128)); From 54a9daa57007550fc253bd64dce3114331a211fd Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 4 Jun 2024 14:15:14 +0200 Subject: [PATCH 15/44] Update reference file --- .../03142_skip_ANSI_in_UTF8_compute_width.reference | 12 +++++++++++- .../03142_skip_ANSI_in_UTF8_compute_width.sql | 6 +++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference index 864f62d3113..fa161970a3d 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference @@ -1,5 +1,15 @@ ┏━━━┓ ┃ x ┃ ┡━━━┩ -1. │ █ │ +1. │ █ │ └───┘ + ┏━━━━━━━━━┳━━━━━━━━━━┓ + ┃ 'Hello' ┃ x ┃ + ┡━━━━━━━━━╇━━━━━━━━━━┩ +1. │ Hello │ █ test █ │ + └─────────┴──────────┘ + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ visibleWidth(format('[38;2;{0};{1};{2}m█', 255, 128, 128)) ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ 22 │ + └─────────────────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql index 812e7124526..17608655ec5 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql @@ -1,3 +1,3 @@ -SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x; -SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x -SELECT visibleWidth(format('\x1b[38;2;{0};{1};{2}m█\x1b[0m',255,128,128)); +SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty; +SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty; +SELECT visibleWidth(format('\x1b[38;2;{0};{1};{2}m█\x1b[0m',255,128,128)) FORMAT Pretty; From 252b5f51c2f8e5a6f41d21245340fae9782445c1 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 4 Jun 2024 15:07:44 +0200 Subject: [PATCH 16/44] update test --- .../03142_skip_ANSI_in_UTF8_compute_width.reference | 5 ----- .../0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql | 1 - 2 files changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference index fa161970a3d..6d375fd471a 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.reference @@ -8,8 +8,3 @@ ┡━━━━━━━━━╇━━━━━━━━━━┩ 1. │ Hello │ █ test █ │ └─────────┴──────────┘ - ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ visibleWidth(format('[38;2;{0};{1};{2}m█', 255, 128, 128)) ┃ - ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ 22 │ - └─────────────────────────────────────────────────────┘ diff --git a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql index 17608655ec5..49f689a4cc5 100644 --- a/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql +++ b/tests/queries/0_stateless/03142_skip_ANSI_in_UTF8_compute_width.sql @@ -1,3 +1,2 @@ SELECT format('\x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty; SELECT 'Hello', format('\x1b[38;2;{0};{1};{2}m█\x1b[0m test \x1b[38;2;{0};{1};{2}m█\x1b[0m', 255, 128, 128) AS x FORMAT Pretty; -SELECT visibleWidth(format('\x1b[38;2;{0};{1};{2}m█\x1b[0m',255,128,128)) FORMAT Pretty; From 1d77cda70b2db1041a89f7bf7537e96795084dae Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 4 Jun 2024 17:13:19 +0000 Subject: [PATCH 17/44] Fix distributed array join by nested --- src/Analyzer/ArrayJoinNode.cpp | 8 +++++++- ...6_analyzer_array_join_distributed.reference | 2 ++ .../03156_analyzer_array_join_distributed.sql | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index 27d7229d46a..0cfb5d80b2a 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -64,7 +65,12 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const auto * column_node = array_join_expression->as(); if (column_node && column_node->getExpression()) - array_join_expression_ast = column_node->getExpression()->toAST(options); + { + if (const auto * function_node = column_node->getExpression()->as(); function_node && function_node->getFunctionName() == "nested") + array_join_expression_ast = array_join_expression->toAST(options); + else + array_join_expression_ast = column_node->getExpression()->toAST(options); + } else array_join_expression_ast = array_join_expression->toAST(options); diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference index b5b2aec9c12..18830a293bd 100644 --- a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference @@ -10,3 +10,5 @@ Hello 1 Hello 1 Hello 2 Hello 2 +2020-01-01 a 2 +2020-01-01 b 4 diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql index f605a369822..55f9877b2ac 100644 --- a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql +++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql @@ -8,3 +8,21 @@ SELECT s, arr, a FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) AR SELECT s, arr FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; SELECT s, arr FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr; + +create table hourly( + hour datetime, + `metric.names` Array(String), + `metric.values` Array(Int64) +) Engine=Memory +as select '2020-01-01', ['a', 'b'], [1,2]; + +SELECT + toDate(hour) AS day, + `metric.names`, + sum(`metric.values`) +FROM remote('127.0.0.{1,2}', currentDatabase(), hourly) +ARRAY JOIN metric +GROUP BY + day, + metric.names +ORDER BY metric.names; From 09c2151f3b0e2e19a1a1f77e27d3677e95b17fb0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jun 2024 03:08:58 +0200 Subject: [PATCH 18/44] Fix style --- src/Common/UTF8Helpers.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index 006ec33c08b..bfa860af98a 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -103,7 +103,7 @@ template size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept { UTF8Decoder decoder; - int isEscapeSequence = false; + bool is_escape_sequence = false; size_t width = 0; size_t rollback = 0; for (size_t i = 0; i < size; ++i) @@ -116,10 +116,8 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l while (i + 15 < size) { - if (isEscapeSequence) - { + if (is_escape_sequence) break; - } __m128i bytes = _mm_loadu_si128(reinterpret_cast(&data[i])); @@ -145,17 +143,15 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l while (i < size && isPrintableASCII(data[i])) { - auto isParameterByte = isCSIParameterByte(data[i]); - auto isIntermediateByte = isCSIIntermediateByte(data[i]); - auto ignore_width = isEscapeSequence && (isParameterByte || isIntermediateByte); + bool ignore_width = is_escape_sequence && (isCSIParameterByte(data[i]) || isCSIIntermediateByte(data[i])); - if (ignore_width || (data[i] == '[' && isEscapeSequence)) + if (ignore_width || (data[i] == '[' && is_escape_sequence)) { /// don't count the width } - else if (isEscapeSequence && isCSIFinalByte(data[i])) + else if (is_escape_sequence && isCSIFinalByte(data[i])) { - isEscapeSequence = false; + is_escape_sequence = false; } else { From a13bf252683670c5db4ce4eb62ab19008e463a52 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 5 Jun 2024 10:26:56 +0200 Subject: [PATCH 19/44] Trigger CI From 5f3bc4271f6a0fe87a3cd2b9d1e694a88639ef2a Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 5 Jun 2024 10:58:30 +0200 Subject: [PATCH 20/44] rename forgoten isEscapeSequence to is_escape_sequence --- src/Common/UTF8Helpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/UTF8Helpers.cpp b/src/Common/UTF8Helpers.cpp index bfa860af98a..dd24cb20933 100644 --- a/src/Common/UTF8Helpers.cpp +++ b/src/Common/UTF8Helpers.cpp @@ -184,7 +184,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l // special treatment for '\t' and for ESC size_t next_width = width; if (decoder.codepoint == '\x1b') - isEscapeSequence = true; + is_escape_sequence = true; else if (decoder.codepoint == '\t') next_width += 8 - (prefix + width) % 8; else From 69d23f5e67a13b07b6b29e8c54c9f6e29f86fb9c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 29 Dec 2023 15:02:11 +0100 Subject: [PATCH 21/44] Fix all problems in tests that had been found by flake8 Signed-off-by: Azat Khuzhin --- tests/integration/helpers/hdfs_api.py | 3 +-- tests/integration/test_backup_restore_new/test.py | 2 +- .../test_disallow_concurrency.py | 4 ++-- .../test_convert_ordinary.py | 2 +- .../test_backward_compatibility/test_functions.py | 2 +- .../integration/test_disk_over_web_server/test.py | 4 ++-- tests/integration/test_jbod_balancer/test.py | 2 +- tests/integration/test_jdbc_bridge/test.py | 8 ++++---- .../test_keeper_snapshot_small_distance/test.py | 2 +- tests/integration/test_keeper_snapshots/test.py | 1 - .../test_keeper_three_nodes_start/test.py | 1 - .../test_merge_tree_azure_blob_storage/test.py | 5 +---- .../test.py | 6 ++---- tests/integration/test_scheduler/test.py | 1 + tests/integration/test_storage_hudi/test.py | 2 +- tests/integration/test_storage_iceberg/test.py | 2 +- tests/integration/test_storage_rabbitmq/test.py | 14 ++++++-------- tests/integration/test_ttl_move/test.py | 2 +- tests/integration/test_ttl_replicated/test.py | 2 +- 19 files changed, 28 insertions(+), 37 deletions(-) diff --git a/tests/integration/helpers/hdfs_api.py b/tests/integration/helpers/hdfs_api.py index 5739496cb50..4e4468fef77 100644 --- a/tests/integration/helpers/hdfs_api.py +++ b/tests/integration/helpers/hdfs_api.py @@ -110,10 +110,9 @@ class HDFSApi(object): logging.debug( "Stdout:\n{}\n".format(res.stdout.decode("utf-8")) ) - logging.debug("Env:\n{}\n".format(env)) raise Exception( "Command {} return non-zero code {}: {}".format( - args, res.returncode, res.stderr.decode("utf-8") + cmd, res.returncode, res.stderr.decode("utf-8") ) ) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index ef9e536976b..68b8d29f42e 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1474,7 +1474,7 @@ def test_backup_all(exclude_system_log_tables): restore_settings = [] if not exclude_system_log_tables: restore_settings.append("allow_non_empty_tables=true") - restore_command = f"RESTORE ALL FROM {backup_name} {'SETTINGS '+ ', '.join(restore_settings) if restore_settings else ''}" + restore_command = f"RESTORE ALL FROM {backup_name} {'SETTINGS ' + ', '.join(restore_settings) if restore_settings else ''}" session_id = new_session_id() instance.http_query( diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index c9f20333654..cd0f2032559 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -161,13 +161,13 @@ def wait_for_fail_restore(node, restore_id): elif status == "RESTORING": assert_eq_with_retry( node, - f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + f"SELECT status FROM system.backups WHERE id = '{restore_id}'", "RESTORE_FAILED", sleep_time=2, retry_count=50, ) error = node.query( - f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + f"SELECT error FROM system.backups WHERE id == '{restore_id}'" ).rstrip("\n") assert re.search( "Cannot restore the table default.tbl because it already contains some data", diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py index b8db4e005a4..f5d0c066600 100644 --- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py +++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py @@ -187,7 +187,7 @@ def check_convert_all_dbs_to_atomic(): # 6 tables, MVs contain 2 rows (inner tables does not match regexp) assert "8\t{}\n".format(8 * len("atomic")) == node.query( - "SELECT count(), sum(n) FROM atomic.merge".format(db) + "SELECT count(), sum(n) FROM atomic.merge" ) node.query("DETACH TABLE ordinary.detached PERMANENTLY") diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 1cf5c3deb81..758dda655da 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -89,7 +89,7 @@ def test_aggregate_states(start_cluster): logging.info("Skipping %s", aggregate_function) skipped += 1 continue - logging.exception("Failed %s", function) + logging.exception("Failed %s", aggregate_function) failed += 1 continue diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 9f43ab73fa3..f4ea7d54571 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -116,7 +116,7 @@ def test_usage(cluster, node_name): (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; """.format( - i, uuids[i], i, i + i, uuids[i] ) ) @@ -338,7 +338,7 @@ def test_page_cache(cluster): (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; """.format( - i, uuids[i], i, i + i, uuids[i] ) ) diff --git a/tests/integration/test_jbod_balancer/test.py b/tests/integration/test_jbod_balancer/test.py index 69ab83283ff..8635f5e612a 100644 --- a/tests/integration/test_jbod_balancer/test.py +++ b/tests/integration/test_jbod_balancer/test.py @@ -90,7 +90,7 @@ def wait_until_fully_merged(node, table): except: return - raise Exception(f"There are still merges on-going after {retry} assignments") + raise Exception(f"There are still merges on-going after {i} assignments") def test_jbod_balanced_merge(start_cluster): diff --git a/tests/integration/test_jdbc_bridge/test.py b/tests/integration/test_jdbc_bridge/test.py index c4a0a525df3..1efd868e4a7 100644 --- a/tests/integration/test_jdbc_bridge/test.py +++ b/tests/integration/test_jdbc_bridge/test.py @@ -91,7 +91,7 @@ def test_jdbc_insert(started_cluster): """ CREATE TABLE test.test_insert ENGINE = Memory AS SELECT * FROM test.ClickHouseTable; - SELECT * + SELECT * FROM jdbc('{0}?mutation', 'INSERT INTO test.test_insert VALUES({1}, ''{1}'', ''{1}'')'); """.format( datasource, records @@ -115,7 +115,7 @@ def test_jdbc_update(started_cluster): """ CREATE TABLE test.test_update ENGINE = Memory AS SELECT * FROM test.ClickHouseTable; - SELECT * + SELECT * FROM jdbc( '{}?mutation', 'SET mutations_sync = 1; ALTER TABLE test.test_update UPDATE Str=''{}'' WHERE Num = {} - 1;' @@ -145,7 +145,7 @@ def test_jdbc_delete(started_cluster): """ CREATE TABLE test.test_delete ENGINE = Memory AS SELECT * FROM test.ClickHouseTable; - SELECT * + SELECT * FROM jdbc( '{}?mutation', 'SET mutations_sync = 1; ALTER TABLE test.test_delete DELETE WHERE Num < {} - 1;' @@ -158,7 +158,7 @@ def test_jdbc_delete(started_cluster): expected = records - 1 actual = instance.query( "SELECT Str FROM jdbc('{}', 'SELECT * FROM test.test_delete')".format( - datasource, records + datasource ) ) assert int(actual) == expected, "expecting {} but got {}".format(expected, actual) diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index be8bf1bd245..612c5b3c65d 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -##!/usr/bin/env python3 + import pytest from helpers.cluster import ClickHouseCluster import helpers.keeper_utils as keeper_utils diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 6dfb2078559..951970dba23 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -#!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster import helpers.keeper_utils as keeper_utils diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py index bc93a6089cb..6576d386fcb 100644 --- a/tests/integration/test_keeper_three_nodes_start/test.py +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -#!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster import random diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 7f77627e793..45ae88f427e 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -537,10 +537,7 @@ def test_freeze_unfreeze(cluster): def test_apply_new_settings(cluster): node = cluster.instances[NODE_NAME] create_table(node, TABLE_NAME) - config_path = os.path.join( - SCRIPT_DIR, - "./_gen/disk_storage_conf.xml".format(cluster.instances_dir_name), - ) + config_path = os.path.join(SCRIPT_DIR, "./_gen/disk_storage_conf.xml") azure_query( node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}" diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index f04425d83d4..0e87cb0e690 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -179,9 +179,7 @@ def test_different_data_types(started_cluster): for i in range(10): col = random.choice(["a", "b", "c"]) cursor.execute("UPDATE test_data_types SET {} = {};".format(col, i)) - cursor.execute( - """UPDATE test_data_types SET i = '2020-12-12';""".format(col, i) - ) + cursor.execute("UPDATE test_data_types SET i = '2020-12-12';") check_tables_are_synchronized(instance, "test_data_types", "id") @@ -452,7 +450,7 @@ def test_many_concurrent_queries(started_cluster): # also change primary key value print("try update primary key {}".format(thread_id)) cursor.execute( - "UPDATE {table}_{} SET key=key%100000+100000*{} WHERE key%{}=0".format( + "UPDATE {} SET key=key%100000+100000*{} WHERE key%{}=0".format( table_name, i + 1, i + 1 ) ) diff --git a/tests/integration/test_scheduler/test.py b/tests/integration/test_scheduler/test.py index e6def99c076..8e37bd8d403 100644 --- a/tests/integration/test_scheduler/test.py +++ b/tests/integration/test_scheduler/test.py @@ -6,6 +6,7 @@ import time import threading import pytest +from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py index 6fe7a193129..0c3fbfb3cda 100644 --- a/tests/integration/test_storage_hudi/test.py +++ b/tests/integration/test_storage_hudi/test.py @@ -4,7 +4,7 @@ import os import json import helpers.client -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance from helpers.test_tools import TSV from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_contents diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index d9dee0541b0..7762d17b96f 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -1,5 +1,5 @@ import helpers.client -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance from helpers.test_tools import TSV import pyspark diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 23a95d5dd71..3240039ee81 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -702,7 +702,7 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): assert ( int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) + ), "ClickHouse lost some messages: {}".format(result1) assert int(result2) == 10 @@ -1516,7 +1516,7 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): assert ( int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) + ), "ClickHouse lost some messages: {}".format(result1) assert int(result2) == 4 * num_tables @@ -1966,7 +1966,7 @@ def test_rabbitmq_many_consumers_to_each_queue(rabbitmq_cluster): assert ( int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result) + ), "ClickHouse lost some messages: {}".format(result1) # 4 tables, 2 consumers for each table => 8 consumer tags assert int(result2) == 8 @@ -2427,9 +2427,7 @@ def test_rabbitmq_drop_table_properly(rabbitmq_cluster): time.sleep(30) try: - exists = channel.queue_declare( - callback, queue="rabbit_queue_drop", passive=True - ) + exists = channel.queue_declare(queue="rabbit_queue_drop", passive=True) except Exception as e: exists = False @@ -3364,7 +3362,7 @@ def test_rabbitmq_flush_by_block_size(rabbitmq_cluster): routing_key="", body=json.dumps({"key": 0, "value": 0}), ) - except e: + except Exception as e: logging.debug(f"Got error: {str(e)}") produce_thread = threading.Thread(target=produce) @@ -3442,7 +3440,7 @@ def test_rabbitmq_flush_by_time(rabbitmq_cluster): ) logging.debug("Produced a message") time.sleep(0.8) - except e: + except Exception as e: logging.debug(f"Got error: {str(e)}") produce_thread = threading.Thread(target=produce) diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 94432b89ab6..3b79ea7916d 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1850,7 +1850,7 @@ class TestCancelBackgroundMoving: config = inspect.cleandoc( f""" - { 256 * 1024 } + {256 * 1024} """ ) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index f944adbea41..538322473ee 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -325,7 +325,7 @@ def optimize_with_retry(node, table_name, retry=20): settings={"optimize_throw_if_noop": "1"}, ) break - except e: + except: time.sleep(0.5) From a474816fc744088ae0c300971de5043a5c054c72 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 29 Dec 2023 16:18:03 +0100 Subject: [PATCH 22/44] Add missing botocore import into clickhouse_backupview.py Signed-off-by: Azat Khuzhin --- utils/backupview/clickhouse_backupview.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/backupview/clickhouse_backupview.py b/utils/backupview/clickhouse_backupview.py index 4ba1f391d02..d1331e2ab49 100755 --- a/utils/backupview/clickhouse_backupview.py +++ b/utils/backupview/clickhouse_backupview.py @@ -8,6 +8,7 @@ import shutil import zipfile # For reading backups from zip archives import boto3 # For reading backups from S3 +import botocore ## Examples: From b2535d7f508c189c9fcbf871c3b60ac722afdaf7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 Jun 2024 09:50:39 +0200 Subject: [PATCH 23/44] Fix invalid escape sequence warnings Signed-off-by: Azat Khuzhin --- tests/integration/helpers/uclient.py | 4 ++-- tests/integration/test_prometheus_endpoint/test.py | 2 +- .../0_stateless/01056_window_view_proc_hop_watch.py | 6 +++--- .../01059_window_view_event_hop_watch_strict_asc.py | 8 ++++---- .../01062_window_view_event_hop_watch_asc.py | 6 +++--- .../01065_window_view_event_hop_watch_bounded.py | 4 ++-- .../0_stateless/01069_window_view_proc_tumble_watch.py | 8 ++++---- .../0_stateless/01070_window_view_watch_events.py | 6 +++--- .../0_stateless/01078_window_view_alter_query_watch.py | 10 +++++----- .../0_stateless/01082_window_view_watch_limit.py | 4 ++-- tests/queries/0_stateless/01921_test_progress_bar.py | 4 ++-- tests/queries/0_stateless/02473_infile_progress.py | 4 ++-- tests/queries/0_stateless/helpers/client.py | 4 ++-- tests/queries/0_stateless/helpers/shell.py | 2 +- 14 files changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/integration/helpers/uclient.py b/tests/integration/helpers/uclient.py index 45c8b8f64e2..195eb52ffeb 100644 --- a/tests/integration/helpers/uclient.py +++ b/tests/integration/helpers/uclient.py @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.join(CURDIR)) from . import uexpect -prompt = ":\) " +prompt = ":\\) " end_of_block = r".*\r\n.*\r\n" @@ -21,7 +21,7 @@ class client(object): self.client.eol("\r") self.client.logger(log, prefix=name) self.client.timeout(20) - self.client.expect("[#\$] ", timeout=2) + self.client.expect("[#\\$] ", timeout=2) self.client.send(command) def __enter__(self): diff --git a/tests/integration/test_prometheus_endpoint/test.py b/tests/integration/test_prometheus_endpoint/test.py index f140ebdfbe7..c1f04497b55 100644 --- a/tests/integration/test_prometheus_endpoint/test.py +++ b/tests/integration/test_prometheus_endpoint/test.py @@ -28,7 +28,7 @@ def parse_response_line(line): if line.startswith("#"): return {} - match = re.match("^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? -?(\d)", line) + match = re.match(r"^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? -?(\d)", line) assert match, line name, _, val = match.groups() return {name: int(val)} diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 2db14fcdddf..e65650816ab 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -49,16 +49,16 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01056_window_view_proc_hop_watch.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 2323ee5c838..3dbb176b0dc 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -47,7 +47,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH db_01059_event_hop_watch_strict_asc.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO db_01059_event_hop_watch_strict_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -57,7 +57,7 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1*1990-01-01 12:00:02" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client2.send( "INSERT INTO db_01059_event_hop_watch_strict_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));" @@ -65,11 +65,11 @@ with client(name="client1>", log=log) as client1, client( client2.expect("Ok.") client1.expect("1*1990-01-01 12:00:06" + end_of_block) client1.expect("1*1990-01-01 12:00:08" + end_of_block) - client1.expect("Progress: 3.00 rows.*\)") + client1.expect("Progress: 3.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index db9e8cef6c5..d6cc3ee1a88 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -49,7 +49,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01062_window_view_event_hop_watch_asc.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01062_window_view_event_hop_watch_asc.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -69,11 +69,11 @@ with client(name="client1>", log=log) as client1, client( client2.expect(prompt) client1.expect("1" + end_of_block) client1.expect("2" + end_of_block) - client1.expect("Progress: 3.00 rows.*\)") + client1.expect("Progress: 3.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index b8d5ff02d37..e5f9ab59f60 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -50,7 +50,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01065_window_view_event_hop_watch_bounded.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01065_window_view_event_hop_watch_bounded.mt VALUES (1, '1990/01/01 12:00:00');" ) @@ -72,7 +72,7 @@ with client(name="client1>", log=log) as client1, client( # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index 21c2e831afc..8c3a46992dc 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -49,23 +49,23 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01069_window_view_proc_tumble_watch.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client2.send( "INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)" ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 2.00 rows.*\)") + client1.expect("Progress: 2.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index 1cf7678a014..172a82a29da 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -49,7 +49,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01070_window_view_watch_events.wv EVENTS") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01070_window_view_watch_events.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -59,11 +59,11 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1990-01-01 12:00:05" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index 3f3dfe0cda8..05aeb1b4ccb 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -55,7 +55,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01078_window_view_alter_query_watch.wv") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));" ) @@ -65,7 +65,7 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client2.send( "ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid" ) @@ -75,7 +75,7 @@ with client(name="client1>", log=log) as client1, client( client1.expect(prompt) client3.send("WATCH 01078_window_view_alter_query_watch.wv") client3.expect("Query id" + end_of_block) - client3.expect("Progress: 0.00 rows.*\)") + client3.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" ) @@ -85,11 +85,11 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client3.expect("2" + end_of_block) - client3.expect("Progress: 1.00 rows.*\)") + client3.expect("Progress: 1.00 rows.*\\)") # send Ctrl-C client3.send("\x03", eol="") - match = client3.expect("(%s)|([#\$] )" % prompt) + match = client3.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client3.send(client3.command) client3.expect(prompt) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 9938ebcab98..5dcdfdb5020 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -49,7 +49,7 @@ with client(name="client1>", log=log) as client1, client( client1.send("WATCH 01082_window_view_watch_limit.wv LIMIT 1") client1.expect("Query id" + end_of_block) - client1.expect("Progress: 0.00 rows.*\)") + client1.expect("Progress: 0.00 rows.*\\)") client2.send( "INSERT INTO 01082_window_view_watch_limit.mt VALUES (1, '1990/01/01 12:00:00');" ) @@ -59,7 +59,7 @@ with client(name="client1>", log=log) as client1, client( ) client2.expect("Ok.") client1.expect("1" + end_of_block) - client1.expect("Progress: 1.00 rows.*\)") + client1.expect("Progress: 1.00 rows.*\\)") client1.expect("1 row" + end_of_block) client1.expect(prompt) diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 54c7ae59894..6406534a647 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -15,6 +15,6 @@ log = None with client(name="client1>", log=log) as client1: client1.expect(prompt) client1.send("SELECT number FROM numbers(1000) FORMAT Null") - client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) - client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") + client1.expect("Progress: 1\\.00 thousand rows, 8\\.00 KB .*" + end_of_block) + client1.expect("0 rows in set. Elapsed: [\\w]{1}\\.[\\w]{3} sec.") client1.expect("Peak memory usage: .*B" + end_of_block) diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 9941736107f..4165eeb6d31 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -32,12 +32,12 @@ with client( ) client1.expect(prompt) client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'") - client1.expect("Progress: 5.00 rows, 10.00 B.*\)") + client1.expect("Progress: 5.00 rows, 10.00 B.*\\)") client1.expect(prompt) # send Ctrl-C client1.send("\x03", eol="") - match = client1.expect("(%s)|([#\$] )" % prompt) + match = client1.expect("(%s)|([#\\$] )" % prompt) if match.groups()[1]: client1.send(client1.command) client1.expect(prompt) diff --git a/tests/queries/0_stateless/helpers/client.py b/tests/queries/0_stateless/helpers/client.py index 5c8589dfca1..ac0896f2e93 100644 --- a/tests/queries/0_stateless/helpers/client.py +++ b/tests/queries/0_stateless/helpers/client.py @@ -8,7 +8,7 @@ sys.path.insert(0, os.path.join(CURDIR)) import uexpect -prompt = ":\) " +prompt = ":\\) " end_of_block = r".*\r\n.*\r\n" @@ -21,7 +21,7 @@ class client(object): self.client.eol("\r") self.client.logger(log, prefix=name) self.client.timeout(120) - self.client.expect("[#\$] ", timeout=60) + self.client.expect("[#\\$] ", timeout=60) self.client.send(command) def __enter__(self): diff --git a/tests/queries/0_stateless/helpers/shell.py b/tests/queries/0_stateless/helpers/shell.py index befb3dcd543..c3fff61ffc9 100644 --- a/tests/queries/0_stateless/helpers/shell.py +++ b/tests/queries/0_stateless/helpers/shell.py @@ -10,7 +10,7 @@ import uexpect class shell(object): - def __init__(self, command=None, name="", log=None, prompt="[#\$] "): + def __init__(self, command=None, name="", log=None, prompt="[#\\$] "): if command is None: command = ["/bin/bash", "--noediting"] self.prompt = prompt From 11905682a9facddcde8296309e97dedee5479afb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 29 Dec 2023 14:51:24 +0100 Subject: [PATCH 24/44] Check python code with flake8 Recently assert-on-tuple had been introduced in tests [1], let's prevent this. [1]: https://github.com/ClickHouse/ClickHouse/pull/56367#discussion_r1437098533 v2: pin flake8 to 4.0.1 (instead of originally 6.1) due to other dependencies, hope that it will find such errors Signed-off-by: Azat Khuzhin --- docker/test/style/Dockerfile | 1 + docker/test/style/run.sh | 2 + docs/en/development/continuous-integration.md | 3 + utils/check-style/check-flake8 | 55 +++++++++++++++++++ .../check-style/process_style_check_result.py | 1 + 5 files changed, 62 insertions(+) create mode 100755 utils/check-style/check-flake8 diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index cb29185f068..91768c8328d 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -30,6 +30,7 @@ RUN pip3 install \ mypy==1.8.0 \ pylint==3.1.0 \ python-magic==0.4.24 \ + flake8==4.0.1 \ requests \ thefuzz \ types-requests \ diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index cc6cb292b66..64803191532 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -9,6 +9,8 @@ echo "Check style" | ts ./check-style -n |& tee /test_output/style_output.txt echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt +echo "Check python with flake8" | ts +./check-flake8 |& tee /test_output/flake8_output.txt echo "Check python type hinting with mypy" | ts ./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index c348eb5ca07..c283cfbf4c2 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -91,6 +91,9 @@ cd ./utils/check-style # Check python type hinting with mypy ./check-mypy +# Check python with flake8 +./check-flake8 + # Check code with codespell ./check-typos diff --git a/utils/check-style/check-flake8 b/utils/check-style/check-flake8 new file mode 100755 index 00000000000..58dd8a99d40 --- /dev/null +++ b/utils/check-style/check-flake8 @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +function join_by() { local IFS="$1"; shift; echo "$*"; } + +set -e + +# We check only our code, that's why we skip contrib +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-./} + +# Find all *.py, *.python files and executable files without extension +# that are determined as python scripts by 'file' util +# in the repo except the contrib directory. +find_cmd=( + find "$GIT_ROOT" -type f -not -path "${GIT_ROOT}contrib/*" + \( + \( + -name '*.py' -or -name "*.python" -or + \( + -executable -not -name "*.*" -exec sh -c 'file {} | grep -q "Python script"' \; + \) + \) + # We skip modules generated by the protocol buffer compiler from *.proto files. + -and -not -name '*_pb2.py' -and -not -name '*_pb2_grpc.py' + \) -print0 +) + +ignores=( + E101 # Indentation contains mixed spaces and tabs + E203 # Whitespace before ':' + E226 # missing whitespace around arithmetic operator + E266 # Too many leading '#' for block comment + E401 # Multiple imports on one line + E402 # Module level import not at top of file + E501 # line too long + E711 # Comparison to None should be 'cond is None:' + E712 # Comparison to true should be 'if cond is true:' or 'if cond:' + E713 # Test for membership should be 'not in' + E714 # Test for object identity should be 'is not' + E722 # Do not use bare except, specify exception instead + E731 # Do not assign a lambda expression, use a def + E741 # Do not use variables named 'I', 'O', or 'l' + F401 # Module imported but unused + F403 # 'from module import *' used; unable to detect undefined names + F405 # Name may be undefined, or defined from star imports: module + F522 # .format(...) unused named arguments + F541 # f-string without any placeholders + F811 # redefinition of unused name from line N + F841 # local variable name is assigned to but never used + W191 # Indentation contains tabs + W291 # Trailing whitespace + W293 # Blank line contains whitespace + W503 # Line break occurred before a binary operator +) +"${find_cmd[@]}" | xargs -0 flake8 --ignore "$(join_by , "${ignores[@]}")" diff --git a/utils/check-style/process_style_check_result.py b/utils/check-style/process_style_check_result.py index e603084732d..2c349114a59 100755 --- a/utils/check-style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -18,6 +18,7 @@ def process_result(result_folder): "style", "pylint", "black", + "flake8", "mypy", "typos", "whitespaces", From f9c243064f886c0d0260d43787f1a630d911aa74 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jun 2024 04:20:20 +0200 Subject: [PATCH 25/44] A tiny fix for fancy quotes --- src/Parsers/Lexer.cpp | 3 --- .../0_stateless/03167_fancy_quotes_off_by_one.reference | 1 + tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference create mode 100644 tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 5f2bd50524c..b4601389696 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -59,9 +59,6 @@ Token quotedStringWithUnicodeQuotes(const char *& pos, const char * const token_ pos = find_first_symbols<'\xE2'>(pos, end); if (pos + 2 >= end) return Token(error_token, token_begin, end); - /// Empty identifiers are not allowed, while empty strings are. - if (success_token == TokenType::QuotedIdentifier && pos + 3 >= end) - return Token(error_token, token_begin, end); if (pos[0] == '\xE2' && pos[1] == '\x80' && pos[2] == expected_end_byte) { diff --git a/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference new file mode 100644 index 00000000000..9daeafb9864 --- /dev/null +++ b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.reference @@ -0,0 +1 @@ +test diff --git a/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql new file mode 100644 index 00000000000..6f563d8f2a1 --- /dev/null +++ b/tests/queries/0_stateless/03167_fancy_quotes_off_by_one.sql @@ -0,0 +1 @@ +SELECT ‘test’ AS “column” \ No newline at end of file From fd930971301edfc6f5f199744354ab4f5005beb7 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 29 May 2024 04:10:38 +0000 Subject: [PATCH 26/44] Fix writing ORC statistics for unsigned types --- contrib/orc | 2 +- .../Impl/NativeORCBlockInputFormat.cpp | 7 +++- .../Formats/Impl/ORCBlockOutputFormat.cpp | 12 +++--- .../0_stateless/02892_orc_filter_pushdown.sql | 2 +- .../03164_orc_signedness.reference | 41 +++++++++++++++++++ .../0_stateless/03164_orc_signedness.sql | 40 ++++++++++++++++++ 6 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/03164_orc_signedness.reference create mode 100644 tests/queries/0_stateless/03164_orc_signedness.sql diff --git a/contrib/orc b/contrib/orc index e24f2c2a3ca..947cebaf943 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad +Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 0b55f633c6a..dcd5a531b05 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -269,7 +269,12 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy case orc::SHORT: case orc::INT: case orc::LONG: { - /// May throw exception + /// May throw exception. + /// + /// In particular, it'll throw if we request the column as unsigned, like this: + /// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10 + /// We have to reject this, otherwise it would miss values > 127 (because + /// they're treated as negative by ORC). auto val = field.get(); return orc::Literal(val); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 1e36c100667..6f543a05fba 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -315,18 +315,20 @@ void ORCBlockOutputFormat::writeColumn( if (null_bytemap) orc_column.hasNulls = true; + /// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to + /// make the ORC library calculate min and max correctly. switch (type->getTypeId()) { case TypeIndex::Enum8: [[fallthrough]]; case TypeIndex::Int8: { /// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first. - writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return static_cast(value); }); + writeNumbers(orc_column, column, null_bytemap, [](const Int8 & value){ return Int64(Int8(value)); }); break; } case TypeIndex::UInt8: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; }); + writeNumbers(orc_column, column, null_bytemap, [](const UInt8 & value){ return Int64(Int8(value)); }); break; } case TypeIndex::Enum16: [[fallthrough]]; @@ -338,7 +340,7 @@ void ORCBlockOutputFormat::writeColumn( case TypeIndex::Date: [[fallthrough]]; case TypeIndex::UInt16: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return value; }); + writeNumbers(orc_column, column, null_bytemap, [](const UInt16 & value){ return Int64(Int16(value)); }); break; } case TypeIndex::Date32: [[fallthrough]]; @@ -349,12 +351,12 @@ void ORCBlockOutputFormat::writeColumn( } case TypeIndex::UInt32: { - writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return value; }); + writeNumbers(orc_column, column, null_bytemap, [](const UInt32 & value){ return Int64(Int32(value)); }); break; } case TypeIndex::IPv4: { - writeNumbers(orc_column, column, null_bytemap, [](const IPv4 & value){ return value.toUnderType(); }); + writeNumbers(orc_column, column, null_bytemap, [](const IPv4 & value){ return Int64(Int32(value.toUnderType())); }); break; } case TypeIndex::Int64: diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql index f9aa7696ac6..f1d1ba12570 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-parallel, no-cpu-aarch64 +-- Tags: no-fasttest, no-parallel set output_format_orc_string_as_string = 1; set output_format_orc_row_index_stride = 100; diff --git a/tests/queries/0_stateless/03164_orc_signedness.reference b/tests/queries/0_stateless/03164_orc_signedness.reference new file mode 100644 index 00000000000..3ee822a94c1 --- /dev/null +++ b/tests/queries/0_stateless/03164_orc_signedness.reference @@ -0,0 +1,41 @@ +-- { echoOn } +select x from file('i8.orc') where indexHint(x = -128); +-128 +select x from file('i8.orc') where indexHint(x = 128); +select x from file('u8.orc') where indexHint(x = -128); +-128 +select x from file('u8.orc') where indexHint(x = 128); +select x from file('i16.orc') where indexHint(x = -32768); +-32768 +select x from file('i16.orc') where indexHint(x = 32768); +select x from file('u16.orc') where indexHint(x = -32768); +-32768 +select x from file('u16.orc') where indexHint(x = 32768); +select x from file('i32.orc') where indexHint(x = -2147483648); +-2147483648 +select x from file('i32.orc') where indexHint(x = 2147483648); +select x from file('u32.orc') where indexHint(x = -2147483648); +-2147483648 +select x from file('u32.orc') where indexHint(x = 2147483648); +select x from file('i64.orc') where indexHint(x = -9223372036854775808); +-9223372036854775808 +select x from file('i64.orc') where indexHint(x = 9223372036854775808); +-9223372036854775808 +select x from file('u64.orc') where indexHint(x = -9223372036854775808); +-9223372036854775808 +select x from file('u64.orc') where indexHint(x = 9223372036854775808); +-9223372036854775808 +select x from file('u8.orc', ORC, 'x UInt8') where indexHint(x > 10); +128 +select x from file('u8.orc', ORC, 'x UInt64') where indexHint(x > 10); +18446744073709551488 +select x from file('u16.orc', ORC, 'x UInt16') where indexHint(x > 10); +32768 +select x from file('u16.orc', ORC, 'x UInt64') where indexHint(x > 10); +18446744073709518848 +select x from file('u32.orc', ORC, 'x UInt32') where indexHint(x > 10); +2147483648 +select x from file('u32.orc', ORC, 'x UInt64') where indexHint(x > 10); +18446744071562067968 +select x from file('u64.orc', ORC, 'x UInt64') where indexHint(x > 10); +9223372036854775808 diff --git a/tests/queries/0_stateless/03164_orc_signedness.sql b/tests/queries/0_stateless/03164_orc_signedness.sql new file mode 100644 index 00000000000..ced99c7dca7 --- /dev/null +++ b/tests/queries/0_stateless/03164_orc_signedness.sql @@ -0,0 +1,40 @@ +set input_format_orc_filter_push_down = 1; +set engine_file_truncate_on_insert = 1; + +insert into function file('i8.orc') select materialize(-128)::Int8 as x; +insert into function file('u8.orc') select materialize(128)::UInt8 as x; +insert into function file('i16.orc') select materialize(-32768)::Int16 as x; +insert into function file('u16.orc') select materialize(32768)::UInt16 as x; +insert into function file('i32.orc') select materialize(-2147483648)::Int32 as x; +insert into function file('u32.orc') select materialize(2147483648)::UInt32 as x; +insert into function file('i64.orc') select materialize(-9223372036854775808)::Int64 as x; +insert into function file('u64.orc') select materialize(9223372036854775808)::UInt64 as x; + +-- { echoOn } +select x from file('i8.orc') where indexHint(x = -128); +select x from file('i8.orc') where indexHint(x = 128); +select x from file('u8.orc') where indexHint(x = -128); +select x from file('u8.orc') where indexHint(x = 128); + +select x from file('i16.orc') where indexHint(x = -32768); +select x from file('i16.orc') where indexHint(x = 32768); +select x from file('u16.orc') where indexHint(x = -32768); +select x from file('u16.orc') where indexHint(x = 32768); + +select x from file('i32.orc') where indexHint(x = -2147483648); +select x from file('i32.orc') where indexHint(x = 2147483648); +select x from file('u32.orc') where indexHint(x = -2147483648); +select x from file('u32.orc') where indexHint(x = 2147483648); + +select x from file('i64.orc') where indexHint(x = -9223372036854775808); +select x from file('i64.orc') where indexHint(x = 9223372036854775808); +select x from file('u64.orc') where indexHint(x = -9223372036854775808); +select x from file('u64.orc') where indexHint(x = 9223372036854775808); + +select x from file('u8.orc', ORC, 'x UInt8') where indexHint(x > 10); +select x from file('u8.orc', ORC, 'x UInt64') where indexHint(x > 10); +select x from file('u16.orc', ORC, 'x UInt16') where indexHint(x > 10); +select x from file('u16.orc', ORC, 'x UInt64') where indexHint(x > 10); +select x from file('u32.orc', ORC, 'x UInt32') where indexHint(x > 10); +select x from file('u32.orc', ORC, 'x UInt64') where indexHint(x > 10); +select x from file('u64.orc', ORC, 'x UInt64') where indexHint(x > 10); From b300af350349b5bbefaa4036eed3d7c5d5a102d8 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 29 May 2024 05:00:03 +0000 Subject: [PATCH 27/44] no-fasttest --- tests/queries/0_stateless/03164_orc_signedness.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03164_orc_signedness.sql b/tests/queries/0_stateless/03164_orc_signedness.sql index ced99c7dca7..ae2d0428ca5 100644 --- a/tests/queries/0_stateless/03164_orc_signedness.sql +++ b/tests/queries/0_stateless/03164_orc_signedness.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest, no-parallel + set input_format_orc_filter_push_down = 1; set engine_file_truncate_on_insert = 1; From 40a3708c8f139c28f72e10f916c45a21ad235e28 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 31 May 2024 19:55:13 +0000 Subject: [PATCH 28/44] Fix test --- .../02892_orc_filter_pushdown.reference | 46 +++++++++---------- .../0_stateless/02892_orc_filter_pushdown.sql | 34 ++++++-------- 2 files changed, 35 insertions(+), 45 deletions(-) diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.reference b/tests/queries/0_stateless/02892_orc_filter_pushdown.reference index 9059b403a34..e6c2e9b2b57 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.reference +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.reference @@ -1,8 +1,4 @@ number Nullable(Int64) -u8 Nullable(Int8) -u16 Nullable(Int16) -u32 Nullable(Int32) -u64 Nullable(Int64) i8 Nullable(Int8) i16 Nullable(Int16) i32 Nullable(Int32) @@ -22,34 +18,34 @@ d64 Nullable(Decimal(18, 10)) d128 Nullable(Decimal(38, 20)) -- Go over all types individually -- { echoOn } -select count(), sum(number) from file('02892.orc') where indexHint(u8 in (10, 15, 250)); -800 4229600 -select count(1), min(u8), max(u8) from file('02892.orc') where u8 in (10, 15, 250); -66 10 15 +select count(), sum(number) from file('02892.orc') where indexHint(i8 in (10, 15, -6)); +1100 5744450 +select count(1), min(i8), max(i8) from file('02892.orc') where i8 in (10, 15, -6); +99 -6 15 select count(), sum(number) from file('02892.orc') where indexHint(i8 between -3 and 2); 1000 4999500 select count(1), min(i8), max(i8) from file('02892.orc') where i8 between -3 and 2; 208 -3 2 -select count(), sum(number) from file('02892.orc') where indexHint(u16 between 4000 and 61000 or u16 == 42); -1800 6479100 -select count(1), min(u16), max(u16) from file('02892.orc') where u16 between 4000 and 61000 or u16 == 42; +select count(), sum(number) from file('02892.orc') where indexHint(i16 between 4000 and 61000 or i16 == 42); +1200 1099400 +select count(1), min(i16), max(i16) from file('02892.orc') where i16 between 4000 and 61000 or i16 == 42; 1002 42 5000 select count(), sum(number) from file('02892.orc') where indexHint(i16 between -150 and 250); 500 2474750 select count(1), min(i16), max(i16) from file('02892.orc') where i16 between -150 and 250; 401 -150 250 -select count(), sum(number) from file('02892.orc') where indexHint(u32 in (42, 4294966296)); -200 999900 -select count(1), min(u32), max(u32) from file('02892.orc') where u32 in (42, 4294966296); -1 42 42 +select count(), sum(number) from file('02892.orc') where indexHint(i32 in (42, -1000)); +200 1099900 +select count(1), min(i32), max(i32) from file('02892.orc') where i32 in (42, -1000); +2 -1000 42 select count(), sum(number) from file('02892.orc') where indexHint(i32 between -150 and 250); 500 2474750 select count(1), min(i32), max(i32) from file('02892.orc') where i32 between -150 and 250; 401 -150 250 -select count(), sum(number) from file('02892.orc') where indexHint(u64 in (42, 18446744073709550616)); -100 494950 -select count(1), min(u64), max(u64) from file('02892.orc') where u64 in (42, 18446744073709550616); -1 42 42 +select count(), sum(number) from file('02892.orc') where indexHint(i64 in (42, -1000)); +200 1099900 +select count(1), min(i64), max(i64) from file('02892.orc') where i64 in (42, -1000); +2 -1000 42 select count(), sum(number) from file('02892.orc') where indexHint(i64 between -150 and 250); 500 2474750 select count(1), min(i64), max(i64) from file('02892.orc') where i64 between -150 and 250; @@ -111,21 +107,21 @@ select count(), sum(number) from file('02892.orc') where indexHint(0); 0 \N select count(), min(number), max(number) from file('02892.orc') where indexHint(0); 0 \N \N -select count(), sum(number) from file('02892.orc') where indexHint(s like '99%' or u64 == 2000); +select count(), sum(number) from file('02892.orc') where indexHint(s like '99%' or i64 == 2000); 300 1204850 -select count(), min(s), max(s) from file('02892.orc') where (s like '99%' or u64 == 2000); +select count(), min(s), max(s) from file('02892.orc') where (s like '99%' or i64 == 2000); 12 2000 999 select count(), sum(number) from file('02892.orc') where indexHint(s like 'z%'); 0 \N select count(), min(s), max(s) from file('02892.orc') where (s like 'z%'); 0 \N \N -select count(), sum(number) from file('02892.orc') where indexHint(u8 == 10 or 1 == 1); +select count(), sum(number) from file('02892.orc') where indexHint(i8 == 10 or 1 == 1); 10000 49995000 -select count(), min(u8), max(u8) from file('02892.orc') where (u8 == 10 or 1 == 1); +select count(), min(i8), max(i8) from file('02892.orc') where (i8 == 10 or 1 == 1); 10000 -128 127 -select count(), sum(number) from file('02892.orc') where indexHint(u8 < 0); +select count(), sum(number) from file('02892.orc') where indexHint(i8 < 0); 5300 26042350 -select count(), min(u8), max(u8) from file('02892.orc') where (u8 < 0); +select count(), min(i8), max(i8) from file('02892.orc') where (i8 < 0); 5001 -128 -1 -- { echoOn } select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null is NULL); diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql index f1d1ba12570..e3736de6a17 100644 --- a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql @@ -16,15 +16,9 @@ SET session_timezone = 'UTC'; -- Try all the types. insert into function file('02892.orc') - -- Use negative numbers to test sign extension for signed types and lack of sign extension for - -- unsigned types. with 5000 - number as n select number, - intDiv(n, 11)::UInt8 as u8, - n::UInt16 u16, - n::UInt32 as u32, - n::UInt64 as u64, intDiv(n, 11)::Int8 as i8, n::Int16 i16, n::Int32 as i32, @@ -50,26 +44,26 @@ desc file('02892.orc'); -- Go over all types individually -- { echoOn } -select count(), sum(number) from file('02892.orc') where indexHint(u8 in (10, 15, 250)); -select count(1), min(u8), max(u8) from file('02892.orc') where u8 in (10, 15, 250); +select count(), sum(number) from file('02892.orc') where indexHint(i8 in (10, 15, -6)); +select count(1), min(i8), max(i8) from file('02892.orc') where i8 in (10, 15, -6); select count(), sum(number) from file('02892.orc') where indexHint(i8 between -3 and 2); select count(1), min(i8), max(i8) from file('02892.orc') where i8 between -3 and 2; -select count(), sum(number) from file('02892.orc') where indexHint(u16 between 4000 and 61000 or u16 == 42); -select count(1), min(u16), max(u16) from file('02892.orc') where u16 between 4000 and 61000 or u16 == 42; +select count(), sum(number) from file('02892.orc') where indexHint(i16 between 4000 and 61000 or i16 == 42); +select count(1), min(i16), max(i16) from file('02892.orc') where i16 between 4000 and 61000 or i16 == 42; select count(), sum(number) from file('02892.orc') where indexHint(i16 between -150 and 250); select count(1), min(i16), max(i16) from file('02892.orc') where i16 between -150 and 250; -select count(), sum(number) from file('02892.orc') where indexHint(u32 in (42, 4294966296)); -select count(1), min(u32), max(u32) from file('02892.orc') where u32 in (42, 4294966296); +select count(), sum(number) from file('02892.orc') where indexHint(i32 in (42, -1000)); +select count(1), min(i32), max(i32) from file('02892.orc') where i32 in (42, -1000); select count(), sum(number) from file('02892.orc') where indexHint(i32 between -150 and 250); select count(1), min(i32), max(i32) from file('02892.orc') where i32 between -150 and 250; -select count(), sum(number) from file('02892.orc') where indexHint(u64 in (42, 18446744073709550616)); -select count(1), min(u64), max(u64) from file('02892.orc') where u64 in (42, 18446744073709550616); +select count(), sum(number) from file('02892.orc') where indexHint(i64 in (42, -1000)); +select count(1), min(i64), max(i64) from file('02892.orc') where i64 in (42, -1000); select count(), sum(number) from file('02892.orc') where indexHint(i64 between -150 and 250); select count(1), min(i64), max(i64) from file('02892.orc') where i64 between -150 and 250; @@ -117,17 +111,17 @@ select count(1), min(d128), max(128) from file('02892.orc') where (d128 between select count(), sum(number) from file('02892.orc') where indexHint(0); select count(), min(number), max(number) from file('02892.orc') where indexHint(0); -select count(), sum(number) from file('02892.orc') where indexHint(s like '99%' or u64 == 2000); -select count(), min(s), max(s) from file('02892.orc') where (s like '99%' or u64 == 2000); +select count(), sum(number) from file('02892.orc') where indexHint(s like '99%' or i64 == 2000); +select count(), min(s), max(s) from file('02892.orc') where (s like '99%' or i64 == 2000); select count(), sum(number) from file('02892.orc') where indexHint(s like 'z%'); select count(), min(s), max(s) from file('02892.orc') where (s like 'z%'); -select count(), sum(number) from file('02892.orc') where indexHint(u8 == 10 or 1 == 1); -select count(), min(u8), max(u8) from file('02892.orc') where (u8 == 10 or 1 == 1); +select count(), sum(number) from file('02892.orc') where indexHint(i8 == 10 or 1 == 1); +select count(), min(i8), max(i8) from file('02892.orc') where (i8 == 10 or 1 == 1); -select count(), sum(number) from file('02892.orc') where indexHint(u8 < 0); -select count(), min(u8), max(u8) from file('02892.orc') where (u8 < 0); +select count(), sum(number) from file('02892.orc') where indexHint(i8 < 0); +select count(), min(i8), max(i8) from file('02892.orc') where (i8 < 0); -- { echoOff } -- Nullable and LowCardinality. From f9ac18d74a80fe35e24baa8f896be7e891280888 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 6 Jun 2024 08:16:11 +0000 Subject: [PATCH 29/44] better description for history of a setting changes --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 8b157517263..b47b3a02466 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -95,7 +95,7 @@ static std::map sett {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - {"min_untracked_memory", 4_MiB, 4_KiB, "A new setting."}, + {"min_untracked_memory", 4_MiB, 4_KiB, "A new setting to enable more accurate memory tracking."}, }}, {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, From 45fda3fd3990b8047290af1b226d857cb47608ed Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 6 Jun 2024 08:20:52 +0000 Subject: [PATCH 30/44] use Mi suffix to make things obvious --- tests/integration/test_failed_async_inserts/test.py | 2 +- tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_failed_async_inserts/test.py b/tests/integration/test_failed_async_inserts/test.py index 2bb56b250ea..e7e504e565f 100644 --- a/tests/integration/test_failed_async_inserts/test.py +++ b/tests/integration/test_failed_async_inserts/test.py @@ -45,7 +45,7 @@ def test_failed_async_inserts(started_cluster): ignore_error=True, ) - select_query = "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery' SETTINGS min_untracked_memory = 4194304" + select_query = "SELECT value FROM system.events WHERE event == 'FailedAsyncInsertQuery' SETTINGS min_untracked_memory = '4Mi'" assert node.query(select_query) == "4\n" diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index 92ef928bc2f..de84846c1d7 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -8,7 +8,7 @@ -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 SET use_uncompressed_cache = 0; -SET min_untracked_memory = 4194304; -- 4MiB +SET min_untracked_memory = '4Mi'; -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; From b9edf204d9bf3b37072f3f2c6051fcc7fd286cfa Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 6 Jun 2024 08:51:22 +0000 Subject: [PATCH 31/44] better --- tests/integration/test_settings_constraints_distributed/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index a1f44af1069..51541721a29 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -137,7 +137,7 @@ def test_select_clamps_settings(): assert ( distributed.query( - query, settings={"max_memory_usage": 1, "min_untracked_memory": 4194304} + query, settings={"max_memory_usage": 1, "min_untracked_memory": 4 * 1024 * 1024} ) == "node1\tmax_memory_usage\t11111111\n" "node1\treadonly\t0\n" From 74897790aa146ff814817912c600734c70990895 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 6 Jun 2024 09:00:35 +0000 Subject: [PATCH 32/44] Automatic style fix --- .../integration/test_settings_constraints_distributed/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index 51541721a29..d29b66b43bb 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -137,7 +137,8 @@ def test_select_clamps_settings(): assert ( distributed.query( - query, settings={"max_memory_usage": 1, "min_untracked_memory": 4 * 1024 * 1024} + query, + settings={"max_memory_usage": 1, "min_untracked_memory": 4 * 1024 * 1024}, ) == "node1\tmax_memory_usage\t11111111\n" "node1\treadonly\t0\n" From ba40f7a754c038152d66b0627ebe208029856f4a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jun 2024 13:20:31 +0200 Subject: [PATCH 33/44] Update 03165_string_functions_with_token_text_indexes.sql --- .../03165_string_functions_with_token_text_indexes.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql index a0cb8a35169..fee30af0245 100644 --- a/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql +++ b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql @@ -2,6 +2,8 @@ SELECT '-------- Bloom filter --------'; SELECT ''; DROP TABLE IF EXISTS 03165_token_bf; +SET allow_experimental_full_text_index=1; + CREATE TABLE 03165_token_bf ( id Int64, From 2a5f9c941c331d06fc487bbb1dda423d6b2b370c Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 6 Jun 2024 13:47:13 +0200 Subject: [PATCH 34/44] document to/fromUnixTimestampXYZ functions --- .../functions/type-conversion-functions.md | 197 +++++++++++++++--- 1 file changed, 169 insertions(+), 28 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 5dd1d5ceebe..60bdab22a58 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -2423,11 +2423,7 @@ Result: ## toUnixTimestamp64Milli -## toUnixTimestamp64Micro - -## toUnixTimestamp64Nano - -Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. +Converts a `DateTime64` to a `Int64` value with fixed millisecond precision. The input value is scaled up or down appropriately depending on its precision. :::note The output value is a timestamp in UTC, not in the timezone of `DateTime64`. @@ -2437,24 +2433,22 @@ The output value is a timestamp in UTC, not in the timezone of `DateTime64`. ```sql toUnixTimestamp64Milli(value) -toUnixTimestamp64Micro(value) -toUnixTimestamp64Nano(value) ``` **Arguments** -- `value` — DateTime64 value with any precision. +- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md). **Returned value** -- `value` converted to the `Int64` data type. +- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md). -**Examples** +**Example** Query: ```sql -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +WITH toDateTime64('2009-02-13 23:31:31.011', 3, 'UTC') AS dt64 SELECT toUnixTimestamp64Milli(dt64); ``` @@ -2462,14 +2456,77 @@ Result: ```response ┌─toUnixTimestamp64Milli(dt64)─┐ -│ 1568650812345 │ +│ 1234567891011 │ └──────────────────────────────┘ ``` +## toUnixTimestamp64Micro + +Converts a `DateTime64` to a `Int64` value with fixed microsecond precision. The input value is scaled up or down appropriately depending on its precision. + +:::note +The output value is a timestamp in UTC, not in the timezone of `DateTime64`. +::: + +**Syntax** + +```sql +toUnixTimestamp64Micro(value) +``` + +**Arguments** + +- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md). + +**Example** + Query: -``` sql -WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +```sql +WITH toDateTime64('1970-01-15 06:56:07.891011', 6, 'UTC') AS dt64 +SELECT toUnixTimestamp64Micro(dt64); +``` + +Result: + +```response +┌─toUnixTimestamp64Micro(dt64)─┐ +│ 1234567891011 │ +└──────────────────────────────┘ +``` + +## toUnixTimestamp64Nano + +Converts a `DateTime64` to a `Int64` value with fixed nano precision. The input value is scaled up or down appropriately depending on its precision. + +:::note +The output value is a timestamp in UTC, not in the timezone of `DateTime64`. +::: + +**Syntax** + +```sql +toUnixTimestamp64Nano(value) +``` + +**Arguments** + +- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +WITH toDateTime64('1970-01-01 00:20:34.567891011', 9, 'UTC') AS dt64 SELECT toUnixTimestamp64Nano(dt64); ``` @@ -2477,34 +2534,32 @@ Result: ```response ┌─toUnixTimestamp64Nano(dt64)─┐ -│ 1568650812345678000 │ +│ 1234567891011 │ └─────────────────────────────┘ ``` ## fromUnixTimestamp64Milli -## fromUnixTimestamp64Micro +Converts an `Int64` to a `DateTime64` value with fixed millisecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision. -## fromUnixTimestamp64Nano - -Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on it’s precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone. +:::note +Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone. +::: **Syntax** ``` sql fromUnixTimestamp64Milli(value[, timezone]) -fromUnixTimestamp64Micro(value[, timezone]) -fromUnixTimestamp64Nano(value[, timezone]) ``` **Arguments** -- `value` — `Int64` value with any precision. -- `timezone` — `String` (optional) timezone name of the result. +- `value` — value with any precision. [Int64](../data-types/int-uint.md). +- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md). **Returned value** -- `value` converted to the `DateTime64` data type. +- `value` converted to DateTime64 with precision `3`. [DateTime64](../data-types/datetime64.md). **Example** @@ -2512,15 +2567,101 @@ Query: ``` sql WITH CAST(1234567891011, 'Int64') AS i64 -SELECT fromUnixTimestamp64Milli(i64, 'UTC'); +SELECT + fromUnixTimestamp64Milli(i64, 'UTC') AS x, + toTypeName(x); ``` Result: ```response -┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ -│ 2009-02-13 23:31:31.011 │ -└──────────────────────────────────────┘ +┌───────────────────────x─┬─toTypeName(x)────────┐ +│ 2009-02-13 23:31:31.011 │ DateTime64(3, 'UTC') │ +└─────────────────────────┴──────────────────────┘ +``` + +## fromUnixTimestamp64Micro + +Converts an `Int64` to a `DateTime64` value with fixed microsecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision. + +:::note +Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone. +::: + +**Syntax** + +``` sql +fromUnixTimestamp64Micro(value[, timezone]) +``` + +**Arguments** + +- `value` — value with any precision. [Int64](../data-types/int-uint.md). +- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md). + +**Returned value** + +- `value` converted to DateTime64 with precision `6`. [DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +``` sql +WITH CAST(1234567891011, 'Int64') AS i64 +SELECT + fromUnixTimestamp64Micro(i64, 'UTC') AS x, + toTypeName(x); +``` + +Result: + +```response +┌──────────────────────────x─┬─toTypeName(x)────────┐ +│ 1970-01-15 06:56:07.891011 │ DateTime64(6, 'UTC') │ +└────────────────────────────┴──────────────────────┘ +``` + +## fromUnixTimestamp64Nano + +Converts an `Int64` to a `DateTime64` value with fixed nanosecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision. + +:::note +Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone. +::: + +**Syntax** + +``` sql +fromUnixTimestamp64Nano(value[, timezone]) +``` + +**Arguments** + +- `value` — value with any precision. [Int64](../data-types/int-uint.md). +- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md). + +**Returned value** + +- `value` converted to DateTime64 with precision `9`. [DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +``` sql +WITH CAST(1234567891011, 'Int64') AS i64 +SELECT + fromUnixTimestamp64Nano(i64, 'UTC') AS x, + toTypeName(x); +``` + +Result: + +```response +┌─────────────────────────────x─┬─toTypeName(x)────────┐ +│ 1970-01-01 00:20:34.567891011 │ DateTime64(9, 'UTC') │ +└───────────────────────────────┴──────────────────────┘ ``` ## formatRow From f05a6577232e7061d13fe5888fb3da07cbfdfe39 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 6 Jun 2024 12:56:34 +0000 Subject: [PATCH 35/44] add docs --- docs/en/operations/settings/settings.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ffaf53085c4..ada922cb037 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3172,7 +3172,7 @@ Default value: `0`. ## lightweight_deletes_sync {#lightweight_deletes_sync} -The same as 'mutation_sync', but controls only execution of lightweight deletes. +The same as 'mutation_sync', but controls only execution of lightweight deletes. Possible values: @@ -4616,6 +4616,16 @@ Read more about [memory overcommit](memory-overcommit.md). Default value: `1GiB`. +## max_untracked_memory {#max_untracked_memory} +Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'. + +Default value: `4MiB`. + +## min_untracked_memory {#min_untracked_memory} +Lower bound for untracked memory limit which is applied to threads with low memory consumption. Untracked memory limit equals thread memory usage devided by 16 and clamped between `min_untracked_memory` and `max_untracked_memory` for every thread. It guarantees that total untracked memory does not exceed 10% of current memory footprint even with a lot of small threads. To disable dynamic limit for untracked memory set value `4MiB`. + +Default value: `4KiB`. + ## Schema Inference settings See [schema inference](../../interfaces/schema-inference.md#schema-inference-modes) documentation for more details. From 03458a516afa0f81623fd1e11fa7586d89fa7aab Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:06:16 +0200 Subject: [PATCH 36/44] Fix typo --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 60bdab22a58..2ec51d43c59 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -2501,7 +2501,7 @@ Result: ## toUnixTimestamp64Nano -Converts a `DateTime64` to a `Int64` value with fixed nano precision. The input value is scaled up or down appropriately depending on its precision. +Converts a `DateTime64` to a `Int64` value with fixed nanosecond precision. The input value is scaled up or down appropriately depending on its precision. :::note The output value is a timestamp in UTC, not in the timezone of `DateTime64`. From afc63af264c1ae2cd523485d833912f0dd5090ff Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 6 Jun 2024 15:22:31 +0200 Subject: [PATCH 37/44] Update `largestTriangleThreeBuckets` doc --- .../reference/largestTriangleThreeBuckets.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md index 06443994dd9..4f73aadb8da 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md +++ b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md @@ -24,6 +24,8 @@ Alias: `lttb`. - `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md). - `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md). +NaNs are ignored in the provided series, meaning that any NaN values will be excluded from the analysis. This ensures that the function operates only on valid numerical data. + **Parameters** - `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -61,7 +63,7 @@ Result: ``` text ┌────────largestTriangleThreeBuckets(4)(x, y)───────────┐ -│ [(1,10),(3,15),(5,40),(10,70)] │ +│ [(1,10),(3,15),(9,55),(10,70)] │ └───────────────────────────────────────────────────────┘ ``` From 05592fb5ba97dd86a744b146d15e9a1cb0422357 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 6 Jun 2024 14:27:23 +0000 Subject: [PATCH 38/44] additional log for cleanupDetachedTables --- src/Databases/DatabaseAtomic.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 8edc5b737a6..ccab72cfbae 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -1,20 +1,21 @@ +#include #include +#include #include #include -#include +#include #include #include -#include +#include +#include +#include +#include #include +#include +#include "Common/logger_useful.h" #include #include #include -#include -#include -#include -#include -#include -#include namespace fs = std::filesystem; @@ -393,6 +394,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() { DetachedTables not_in_use; auto it = detached_tables.begin(); + LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size()); while (it != detached_tables.end()) { if (it->second.unique()) @@ -403,6 +405,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() else ++it; } + LOG_DEBUG(log, "Found {} non used tables in detached tables.", not_in_use.size()); /// It should be destroyed in caller with released database mutex return not_in_use; } From dd9b15daf5accedc0e850e4d12b2ebc88b24bd86 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 6 Jun 2024 23:43:14 +0800 Subject: [PATCH 39/44] Fix tupleConcat of two empty tuples --- src/Functions/tupleConcat.cpp | 5 ++++- tests/queries/0_stateless/03167_empty_tuple_concat.reference | 1 + tests/queries/0_stateless/03167_empty_tuple_concat.sql | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03167_empty_tuple_concat.reference create mode 100644 tests/queries/0_stateless/03167_empty_tuple_concat.sql diff --git a/src/Functions/tupleConcat.cpp b/src/Functions/tupleConcat.cpp index c48e4d61463..c9cdae10bcf 100644 --- a/src/Functions/tupleConcat.cpp +++ b/src/Functions/tupleConcat.cpp @@ -61,7 +61,7 @@ public: return std::make_shared(tuple_arg_types); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const size_t num_arguments = arguments.size(); Columns columns; @@ -92,6 +92,9 @@ public: columns.push_back(inner_col); } + if (columns.empty()) + return ColumnTuple::create(input_rows_count); + return ColumnTuple::create(columns); } }; diff --git a/tests/queries/0_stateless/03167_empty_tuple_concat.reference b/tests/queries/0_stateless/03167_empty_tuple_concat.reference new file mode 100644 index 00000000000..6a452c185a8 --- /dev/null +++ b/tests/queries/0_stateless/03167_empty_tuple_concat.reference @@ -0,0 +1 @@ +() diff --git a/tests/queries/0_stateless/03167_empty_tuple_concat.sql b/tests/queries/0_stateless/03167_empty_tuple_concat.sql new file mode 100644 index 00000000000..f6fce86f332 --- /dev/null +++ b/tests/queries/0_stateless/03167_empty_tuple_concat.sql @@ -0,0 +1 @@ +SELECT ()||(); From 2c193a793d197ddf459a0ba0461d5ae908c4db89 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 6 Jun 2024 16:17:36 +0000 Subject: [PATCH 40/44] typo --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ada922cb037..b3e9da816ab 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4622,7 +4622,7 @@ Small allocations and deallocations are grouped in thread local variable and tra Default value: `4MiB`. ## min_untracked_memory {#min_untracked_memory} -Lower bound for untracked memory limit which is applied to threads with low memory consumption. Untracked memory limit equals thread memory usage devided by 16 and clamped between `min_untracked_memory` and `max_untracked_memory` for every thread. It guarantees that total untracked memory does not exceed 10% of current memory footprint even with a lot of small threads. To disable dynamic limit for untracked memory set value `4MiB`. +Lower bound for untracked memory limit which is applied to threads with low memory consumption. Untracked memory limit equals thread memory usage divided by 16 and clamped between `min_untracked_memory` and `max_untracked_memory` for every thread. It guarantees that total untracked memory does not exceed 10% of current memory footprint even with a lot of small threads. To disable dynamic limit for untracked memory set value `4MiB`. Default value: `4KiB`. From eb72c12b31560dad49caff2e532472e8920f38d5 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 7 Jun 2024 00:51:37 +0200 Subject: [PATCH 41/44] CI: Minor fixes in ci scripts --- tests/ci/ci_settings.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index 62e7826dac5..7807cc7ac10 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -211,12 +211,15 @@ class CiSettings: ): res[job] = job_config + add_parents = [] for job in list(res): parent_jobs = CI_CONFIG.get_job_parents(job) for parent_job in parent_jobs: if parent_job not in res: + add_parents.append(parent_job) print(f"Job [{job}] requires [{parent_job}] - add") - res[parent_job] = job_configs[parent_job] + for job in add_parents: + res[job] = job_configs[job] for job, job_config in res.items(): batches = [] From 0deb862c93824146cf9012f95fa247e459c3683d Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 7 Jun 2024 01:22:47 +0200 Subject: [PATCH 42/44] Re-enable Fast test in MQ --- tests/ci/ci.py | 6 +++++- tests/ci/ci_settings.py | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 55a18a2f335..ec6e84dea8c 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -442,7 +442,11 @@ def _configure_jobs( # filter jobs in accordance with ci settings job_configs = ci_settings.apply( - job_configs, pr_info.is_release, is_pr=pr_info.is_pr, labels=pr_info.labels + job_configs, + pr_info.is_release, + is_pr=pr_info.is_pr, + is_mq=pr_info.is_merge_queue, + labels=pr_info.labels, ) # check jobs in ci cache diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index 7807cc7ac10..83d4ddb4211 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -134,6 +134,7 @@ class CiSettings: job_config: JobConfig, is_release: bool, is_pr: bool, + is_mq: bool, labels: Iterable[str], ) -> bool: # type: ignore #too-many-return-statements if self.do_not_test: @@ -189,7 +190,7 @@ class CiSettings: if job_config.release_only and not is_release: return False - elif job_config.pr_only and not is_pr: + elif job_config.pr_only and not is_pr and not is_mq: return False return not to_deny @@ -199,6 +200,7 @@ class CiSettings: job_configs: Dict[str, JobConfig], is_release: bool, is_pr: bool, + is_mq: bool, labels: Iterable[str], ) -> Dict[str, JobConfig]: """ @@ -207,7 +209,7 @@ class CiSettings: res = {} for job, job_config in job_configs.items(): if self._check_if_selected( - job, job_config, is_release=is_release, is_pr=is_pr, labels=labels + job, job_config, is_release=is_release, is_pr=is_pr, is_mq=is_mq, labels=labels ): res[job] = job_config From 8f26f77505a08197f236f6e2cc069cd4111d71ec Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 7 Jun 2024 01:35:39 +0200 Subject: [PATCH 43/44] fix unit test --- tests/ci/test_ci_options.py | 58 +++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index c00cd0b9216..e6fa0389649 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -167,19 +167,19 @@ class TestCIOptions(unittest.TestCase): ) jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} - jobs_configs[ - "fuzzers" - ].run_by_label = ( + jobs_configs["fuzzers"].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs[ - "Integration tests (asan)" - ].release_only = ( + jobs_configs["Integration tests (asan)"].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( ci_options.apply( - jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + jobs_configs, + is_release=False, + is_pr=True, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( @@ -212,7 +212,9 @@ class TestCIOptions(unittest.TestCase): jobs_configs["fuzzers"].run_by_label = "TEST_LABEL" # no settings are set filtered_jobs = list( - CiSettings().apply(jobs_configs, is_release=False, is_pr=True, labels=[]) + CiSettings().apply( + jobs_configs, is_release=False, is_pr=False, is_mq=True, labels=[] + ) ) self.assertCountEqual( filtered_jobs, @@ -220,9 +222,21 @@ class TestCIOptions(unittest.TestCase): "Fast test", ], ) - filtered_jobs = list( - CiSettings().apply(jobs_configs, is_release=True, is_pr=False, labels=[]) + CiSettings().apply( + jobs_configs, is_release=False, is_pr=True, is_mq=False, labels=[] + ) + ) + self.assertCountEqual( + filtered_jobs, + [ + "Fast test", + ], + ) + filtered_jobs = list( + CiSettings().apply( + jobs_configs, is_release=True, is_pr=False, is_mq=False, labels=[] + ) ) self.assertCountEqual( filtered_jobs, @@ -240,7 +254,11 @@ class TestCIOptions(unittest.TestCase): # no settings are set filtered_jobs = list( ci_settings.apply( - jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + jobs_configs, + is_release=False, + is_pr=True, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( @@ -253,7 +271,11 @@ class TestCIOptions(unittest.TestCase): ci_settings.include_keywords = ["Fast"] filtered_jobs = list( ci_settings.apply( - jobs_configs, is_release=True, is_pr=False, labels=["TEST_LABEL"] + jobs_configs, + is_release=True, + is_pr=False, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( @@ -271,13 +293,17 @@ class TestCIOptions(unittest.TestCase): self.assertCountEqual(ci_options.include_keywords, ["analyzer"]) self.assertIsNone(ci_options.exclude_keywords) jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} - jobs_configs[ - "fuzzers" - ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["fuzzers"].run_by_label = ( + "TEST_LABEL" # check "fuzzers" does not appears in the result + ) jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( - jobs_configs, is_release=False, is_pr=True, labels=["TEST_LABEL"] + jobs_configs, + is_release=False, + is_pr=True, + is_mq=False, + labels=["TEST_LABEL"], ) ) self.assertCountEqual( From 367d41e7f042137d4a25b55c740ba3835b5d5435 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 6 Jun 2024 23:41:25 +0000 Subject: [PATCH 44/44] Automatic style fix --- tests/ci/ci_settings.py | 7 ++++++- tests/ci/test_ci_options.py | 14 +++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index 83d4ddb4211..f25344c7701 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -209,7 +209,12 @@ class CiSettings: res = {} for job, job_config in job_configs.items(): if self._check_if_selected( - job, job_config, is_release=is_release, is_pr=is_pr, is_mq=is_mq, labels=labels + job, + job_config, + is_release=is_release, + is_pr=is_pr, + is_mq=is_mq, + labels=labels, ): res[job] = job_config diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index e6fa0389649..60888932803 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -167,10 +167,14 @@ class TestCIOptions(unittest.TestCase): ) jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} - jobs_configs["fuzzers"].run_by_label = ( + jobs_configs[ + "fuzzers" + ].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs["Integration tests (asan)"].release_only = ( + jobs_configs[ + "Integration tests (asan)" + ].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -293,9 +297,9 @@ class TestCIOptions(unittest.TestCase): self.assertCountEqual(ci_options.include_keywords, ["analyzer"]) self.assertIsNone(ci_options.exclude_keywords) jobs_configs = {job: JobConfig() for job in _TEST_JOB_LIST} - jobs_configs["fuzzers"].run_by_label = ( - "TEST_LABEL" # check "fuzzers" does not appears in the result - ) + jobs_configs[ + "fuzzers" + ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply(