From 4b72748b2a1466a3c4947a321a0d5b536888ce63 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 17 May 2024 11:47:45 +0000 Subject: [PATCH 1/5] Better handling illegal user input not to throw unexpected of CORRUPTED_DATA errors --- .../AggregateFunctionsArgMinArgMax.cpp | 4 ++-- src/Dictionaries/SSDCacheDictionaryStorage.h | 7 +++---- src/Interpreters/executeQuery.cpp | 11 ++++++++++- src/Server/HTTPHandler.cpp | 8 ++++---- .../02477_single_value_data_string_regression.sql | 8 ++++---- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp b/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp index e8f40120152..9608ca26f37 100644 --- a/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp +++ b/src/AggregateFunctions/AggregateFunctionsArgMinArgMax.cpp @@ -14,7 +14,7 @@ struct Settings; namespace ErrorCodes { -extern const int CORRUPTED_DATA; +extern const int INCORRECT_DATA; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; } @@ -198,7 +198,7 @@ public: this->data(place).value().read(buf, *serialization_val, arena); if (unlikely(this->data(place).value().has() != this->data(place).result().has())) throw Exception( - ErrorCodes::CORRUPTED_DATA, + ErrorCodes::INCORRECT_DATA, "Invalid state of the aggregate function {}: has_value ({}) != has_result ({})", getName(), this->data(place).value().has(), diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index e3eea71cd9a..f0b56cbf529 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -721,11 +721,10 @@ public: if (!block.checkCheckSum()) { std::string calculated_check_sum = std::to_string(block.calculateCheckSum()); - std::string check_sum = std::to_string(block.getCheckSum()); + std::string expected_check_sum = std::to_string(block.getCheckSum()); throw Exception(ErrorCodes::CORRUPTED_DATA, - "Cache data corrupted. Checksum validation failed. Calculated {} in block {}", - calculated_check_sum, - check_sum); + "Cache data corrupted. Checksum validation failed. Calculated {} expected in block {}, in file {}", + calculated_check_sum, expected_check_sum, file_path); } func(blocks_to_fetch[block_to_fetch_index], block.getBlockData()); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f1f72a4ea4a..cf0167bff17 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1421,7 +1421,16 @@ void executeQuery( const char * begin; const char * end; - istr.nextIfAtEnd(); + try + { + istr.nextIfAtEnd(); + } + catch (...) + { + /// If buffer contains invalid data and we failed to decompress, we still want to have some information about the query in the log. + logQuery("", context, /* internal = */ false, QueryProcessingStage::Complete); + throw; + } size_t max_query_size = context->getSettingsRef().max_query_size; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a677c537622..d1db4cb3951 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -707,11 +707,11 @@ void HTTPHandler::processQuery( /// The data can also be compressed using incompatible internal algorithm. This is indicated by /// 'decompress' query parameter. std::unique_ptr in_post_maybe_compressed; - bool in_post_compressed = false; + bool is_in_post_compressed = false; if (params.getParsed("decompress", false)) { - in_post_maybe_compressed = std::make_unique(*in_post); - in_post_compressed = true; + in_post_maybe_compressed = std::make_unique(*in_post, /* allow_different_codecs_ = */ false, /* external_data_ = */ true); + is_in_post_compressed = true; } else in_post_maybe_compressed = std::move(in_post); @@ -845,7 +845,7 @@ void HTTPHandler::processQuery( /// If 'http_native_compression_disable_checksumming_on_decompress' setting is turned on, /// checksums of client data compressed with internal algorithm are not checked. - if (in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) + if (is_in_post_compressed && settings.http_native_compression_disable_checksumming_on_decompress) static_cast(*in_post_maybe_compressed).disableChecksumming(); /// Add CORS header if 'add_http_cors_header' setting is turned on send * in Access-Control-Allow-Origin diff --git a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql index 0f11a06f3fc..8499786f47a 100644 --- a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql +++ b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql @@ -103,11 +103,11 @@ SELECT '2^30-1', maxMerge(x) from (select CAST(unhex('ffffff3f') || randomString SELECT '1M without 0', length(maxMerge(x)) from (select CAST(unhex('00001000') || randomString(0x00100000 - 1) || 'x', 'AggregateFunction(max, String)') as x); SELECT '1M with 0', length(maxMerge(x)) from (select CAST(unhex('00001000') || randomString(0x00100000 - 1) || '\0', 'AggregateFunction(max, String)') as x); -SELECT 'fuzz1', finalizeAggregation(CAST(unhex('3000000\0303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353600010000000000000000'), 'AggregateFunction(argMax, String, UInt64)')); -- { serverError CORRUPTED_DATA } +SELECT 'fuzz1', finalizeAggregation(CAST(unhex('3000000\0303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353600010000000000000000'), 'AggregateFunction(argMax, String, UInt64)')); -- { serverError INCORRECT_DATA } SELECT 'fuzz2', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '01' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA } -SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA } -SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError CORRUPTED_DATA } +SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError INCORRECT_DATA } +SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError INCORRECT_DATA } +SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError INCORRECT_DATA } drop table if exists aggr; From d1127bf119c9843050cc06093ea130feda6f6c5c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 17 May 2024 12:09:46 +0000 Subject: [PATCH 2/5] Fix final=1 for distributed over non-mt table. --- src/Analyzer/Passes/AutoFinalOnQueryPass.cpp | 2 +- src/Analyzer/QueryTreePassManager.cpp | 4 ++-- .../0_stateless/02420_final_setting_analyzer.reference | 4 ++++ tests/queries/0_stateless/02420_final_setting_analyzer.sql | 3 +++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 9bd044dd89c..70aa1a41548 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -42,7 +42,7 @@ private: return; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - bool is_final_supported = storage && storage->supportsFinal(); + bool is_final_supported = storage && !storage->isRemote() && storage->supportsFinal(); if (!is_final_supported) return; diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 51f1fb6cc2f..f7919b6422c 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -192,7 +192,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) void QueryTreePassManager::runOnlyResolve(QueryTreeNodePtr query_tree_node) { // Run only QueryAnalysisPass and GroupingFunctionsResolvePass passes. - run(query_tree_node, 2); + run(query_tree_node, 3); } void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index) @@ -249,6 +249,7 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) { manager.addPass(std::make_unique(only_analyze)); manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); @@ -294,7 +295,6 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) manager.addPass(std::make_unique()); - manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.reference b/tests/queries/0_stateless/02420_final_setting_analyzer.reference index dd9fed65f13..780a6e5de68 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.reference +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.reference @@ -132,3 +132,7 @@ SELECT * FROM merge_table ORDER BY id, val; 2 a 2 b 3 c +select sum(number) from numbers(10) settings final=1; +45 +select sum(number) from remote('127.0.0.{1,2}', numbers(10)) settings final=1; +90 diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.sql b/tests/queries/0_stateless/02420_final_setting_analyzer.sql index 14c832cfaf5..cbdec017602 100644 --- a/tests/queries/0_stateless/02420_final_setting_analyzer.sql +++ b/tests/queries/0_stateless/02420_final_setting_analyzer.sql @@ -102,3 +102,6 @@ insert into table_to_merge_c values (3,'c'); -- expected output: -- 1 c, 2 a, 2 b, 3 c SELECT * FROM merge_table ORDER BY id, val; + +select sum(number) from numbers(10) settings final=1; +select sum(number) from remote('127.0.0.{1,2}', numbers(10)) settings final=1; From 2a29046d03fb89aca64432c105b84211260d71f3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 May 2024 19:18:39 +0200 Subject: [PATCH 3/5] Revert "Exclude FunctionsConversion from the large objects check for now" This reverts commit 7261f924bb671ceb9d2131175d558df7296ff217. --- utils/check-style/check-large-objects.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index e2266e89556..2122cca911e 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -7,8 +7,6 @@ export LC_ALL=C # The "total" should be printed without localization TU_EXCLUDES=( AggregateFunctionUniq Aggregator - # FIXME: Exclude for now - FunctionsConversion ) if find $1 -name '*.o' | xargs wc -c | grep --regexp='\.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ From 89f26b56c659d3088288928353a4fbf612d4066b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 21 May 2024 19:25:37 +0200 Subject: [PATCH 4/5] Fix stripping heavy debug symbols in functions v2: remove resolving realpath Signed-off-by: Azat Khuzhin --- src/Functions/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 751e8cf5103..c52b00150ec 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -31,7 +31,7 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_obj PRIVATE "-g0") - set_source_files_properties(${DBMS_FUNCTIONS} PROPERTIES COMPILE_FLAGS "-g0") + set_source_files_properties(${DBMS_FUNCTIONS} DIRECTORY .. PROPERTIES COMPILE_FLAGS "-g0") endif() list (APPEND OBJECT_LIBS $) From cad1f1a1112bae7735ecb5c858c13baf53d0fdfd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 22 May 2024 07:50:45 +0200 Subject: [PATCH 5/5] Tune cpu limit for loongarch64 Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d51c1b242f..96ba2961d3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,8 +68,9 @@ if (ENABLE_CHECK_HEAVY_BUILDS) set (RLIMIT_AS 20000000000) endif() - # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file - if (ARCH_RISCV64) + # For some files currently building RISCV64/LOONGARCH64 might be too slow. + # TODO: Improve compilation times per file + if (ARCH_RISCV64 OR ARCH_LOONGARCH64) set (RLIMIT_CPU 1800) endif()