From bc16cc59fff3ccab70fd0ca54ece50e15f99e266 Mon Sep 17 00:00:00 2001 From: Bin Xie Date: Sat, 6 May 2023 11:09:45 +0800 Subject: [PATCH 001/151] If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. --- src/Dictionaries/DictionaryFactory.cpp | 23 +++++++++++++++++-- src/Dictionaries/DictionaryFactory.h | 4 +++- src/Dictionaries/FlatDictionary.cpp | 2 +- .../getDictionaryConfigurationFromAST.cpp | 6 +++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp index d091e49d1f0..c3102632167 100644 --- a/src/Dictionaries/DictionaryFactory.cpp +++ b/src/Dictionaries/DictionaryFactory.cpp @@ -17,13 +17,13 @@ namespace ErrorCodes extern const int UNKNOWN_ELEMENT_IN_CONFIG; } -void DictionaryFactory::registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex) +void DictionaryFactory::registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex, bool has_layout_complex) { auto it = registered_layouts.find(layout_type); if (it != registered_layouts.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryFactory: the layout name '{}' is not unique", layout_type); - RegisteredLayout layout { .layout_create_function = create_layout, .is_layout_complex = is_layout_complex }; + RegisteredLayout layout { .layout_create_function = create_layout, .is_layout_complex = is_layout_complex, .has_layout_complex = has_layout_complex }; registered_layouts.emplace(layout_type, std::move(layout)); } @@ -89,6 +89,25 @@ bool DictionaryFactory::isComplex(const std::string & layout_type) const return it->second.is_layout_complex; } +bool DictionaryFactory::convertToComplex(std::string & layout_type) const +{ + auto it = registered_layouts.find(layout_type); + + if (it == registered_layouts.end()) + { + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, + "Unknown dictionary layout type: {}", + layout_type); + } + + if (!it->second.is_layout_complex && it->second.has_layout_complex) + { + layout_type = "complex_key_" + layout_type; + return true; + } + return false; +} + DictionaryFactory & DictionaryFactory::instance() { diff --git a/src/Dictionaries/DictionaryFactory.h b/src/Dictionaries/DictionaryFactory.h index b1dad340f4b..0dc80af62fc 100644 --- a/src/Dictionaries/DictionaryFactory.h +++ b/src/Dictionaries/DictionaryFactory.h @@ -54,14 +54,16 @@ public: bool created_from_ddl)>; bool isComplex(const std::string & layout_type) const; + bool convertToComplex(std::string & layout_type) const; - void registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex); + void registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex, bool has_layout_complex = true); private: struct RegisteredLayout { LayoutCreateFunction layout_create_function; bool is_layout_complex; + bool has_layout_complex; }; using LayoutRegistry = std::unordered_map; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index d3699a150c4..5bbb5a33fa9 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -683,7 +683,7 @@ void registerDictionaryFlat(DictionaryFactory & factory) return std::make_unique(dict_id, dict_struct, std::move(source_ptr), configuration); }; - factory.registerLayout("flat", create_layout, false); + factory.registerLayout("flat", create_layout, false, false); } diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 0de8b843604..6d3d08a323b 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -609,6 +609,12 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte bool complex = DictionaryFactory::instance().isComplex(dictionary_layout->layout_type); + if (pk_attrs.size() > 1 && !complex + && DictionaryFactory::instance().convertToComplex(dictionary_layout->layout_type)) + { + complex = true; + } + auto all_attr_names_and_types = buildDictionaryAttributesConfiguration( xml_document, structure_element, query.dictionary_attributes_list, pk_attrs); From d3c3d8b8e401d239416f323c69ceb12c67e3c26d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Mar 2023 16:48:40 +0100 Subject: [PATCH 002/151] Remove export of dynamic symbols --- CMakeLists.txt | 10 ++++++++-- programs/library-bridge/CMakeLists.txt | 4 ---- programs/odbc-bridge/CMakeLists.txt | 6 ------ 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 263b202049b..3283ca52ca7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,8 +155,14 @@ elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") endif () -# Make sure the final executable has symbols exported -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") +if (OS_LINUX) + # We should not export dynamic symbols, because: + # - The main clickhouse binary does not use dlopen, + # and whatever is poisoning it by LD_PRELOAD should not link to our symbols. + # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries + # should not expose their symbols to ODBC drivers and libraries. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") +endif () if (OS_DARWIN) # The `-all_load` flag forces loading of all symbols from all libraries, diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 1cacc391ca5..dd0bf67cb64 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp ) -if (OS_LINUX) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 118610e4dcd..56373601b95 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES validateODBCConnectionString.cpp ) -if (OS_LINUX) - # clickhouse-odbc-bridge is always a separate binary. - # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE From 5a3281bb4912dce6a6125681b0804b97653da763 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Mar 2023 19:30:03 +0100 Subject: [PATCH 003/151] Remove unused code --- src/Common/getResource.cpp | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp index fe603fcc550..09777640dd9 100644 --- a/src/Common/getResource.cpp +++ b/src/Common/getResource.cpp @@ -1,6 +1,4 @@ #include "getResource.h" -#include -#include #include #include @@ -14,39 +12,6 @@ std::string_view getResource(std::string_view name) std::replace(name_replaced.begin(), name_replaced.end(), '.', '_'); boost::replace_all(name_replaced, "+", "_PLUS_"); -#if defined USE_MUSL /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. return DB::SymbolIndex::instance()->getResource(name_replaced); - -#else - // In most `dlsym(3)` APIs, one passes the symbol name as it appears via - // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be - // looked up with the string `"_foo"`. - // - // Apple's linker is confusingly different. The NOTES on the man page for - // `dlsym(3)` claim that one looks up the symbol with "the name used in C - // source code". In this example, that would mean using the string `"foo"`. - // This apparently applies even in the case where the symbol did not originate - // from C source, such as the embedded binary resource files used here. So - // the symbol name must not have a leading `_` on Apple platforms. It's not - // clear how this applies to other symbols, such as those which _have_ a leading - // underscore in them by design, many leading underscores, etc. -#if defined OS_DARWIN - std::string prefix = "binary_"; -#else - std::string prefix = "_binary_"; -#endif - std::string symbol_name_start = prefix + name_replaced + "_start"; - std::string symbol_name_end = prefix + name_replaced + "_end"; - - const char * sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); - const char * sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); - - if (sym_start && sym_end) - { - auto resource_size = static_cast(std::distance(sym_start, sym_end)); - return { sym_start, resource_size }; - } - return {}; -#endif } From 8cd9fc4a2d6936343c2be119d8fdd61986cdd77f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Mar 2023 21:20:20 +0100 Subject: [PATCH 004/151] Fix build --- docker/packager/binary/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 2cd0a011013..c7f31e13287 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -62,7 +62,7 @@ then ninja $NINJA_FLAGS clickhouse-keeper ls -la ./programs/ - ldd ./programs/clickhouse-keeper + ldd ./programs/clickhouse-keeper ||: if [ -n "$MAKE_DEB" ]; then # No quotes because I want it to expand to nothing if empty. From 03845ba9c5fb1ccb03330059369641751f1c9ab1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 00:16:23 +0200 Subject: [PATCH 005/151] Fix MSan. --- src/Common/SymbolIndex.cpp | 81 +++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index f1cace5017c..79f97e93a2f 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -9,7 +9,6 @@ #include -//#include #include #include @@ -63,9 +62,11 @@ Otherwise you will get only exported symbols from program headers. #endif #define __msan_unpoison_string(X) // NOLINT +#define __msan_unpoison(X, Y) // NOLINT #if defined(ch_has_feature) # if ch_has_feature(memory_sanitizer) # undef __msan_unpoison_string +# undef __msan_unpoison # include # endif #endif @@ -136,10 +137,12 @@ void collectSymbolsFromProgramHeaders( /* Iterate over all headers of the current shared lib * (first call is for the executable itself) */ + __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { /* Further processing is only needed if the dynamic section is reached */ + __msan_unpoison(&info->dlpi_phdr[header_index], sizeof(info->dlpi_phdr[header_index])); if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC) continue; @@ -160,44 +163,53 @@ void collectSymbolsFromProgramHeaders( */ size_t sym_cnt = 0; - for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it) { - ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); - - // TODO: this branch leads to invalid address of the hash table. Need further investigation. - // if (it->d_tag == DT_HASH) - // { - // const ElfW(Word) * hash = reinterpret_cast(base_address); - // sym_cnt = hash[1]; - // break; - // } - if (it->d_tag == DT_GNU_HASH) + const auto * it = dyn_begin; + while (true) { - /// This code based on Musl-libc. + __msan_unpoison(it, sizeof(*it)); + if (it->d_tag != DT_NULL) + break; - const uint32_t * buckets = nullptr; - const uint32_t * hashval = nullptr; + ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); - const ElfW(Word) * hash = reinterpret_cast(base_address); - - buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); - - for (ElfW(Word) i = 0; i < hash[0]; ++i) - if (buckets[i] > sym_cnt) - sym_cnt = buckets[i]; - - if (sym_cnt) + if (it->d_tag == DT_GNU_HASH) { - sym_cnt -= hash[1]; - hashval = buckets + hash[0] + sym_cnt; - do + /// This code based on Musl-libc. + + const uint32_t * buckets = nullptr; + const uint32_t * hashval = nullptr; + + const ElfW(Word) * hash = reinterpret_cast(base_address); + + __msan_unpoison(&hash[0], sizeof(*hash)); + __msan_unpoison(&hash[1], sizeof(*hash)); + __msan_unpoison(&hash[2], sizeof(*hash)); + + buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); + + __msan_unpoison(buckets, hash[0] * sizeof(buckets[0])); + + for (ElfW(Word) i = 0; i < hash[0]; ++i) + if (buckets[i] > sym_cnt) + sym_cnt = buckets[i]; + + if (sym_cnt) { - ++sym_cnt; + sym_cnt -= hash[1]; + hashval = buckets + hash[0] + sym_cnt; + __msan_unpoison(&hashval, sizeof(hashval)); + do + { + ++sym_cnt; + } + while (!(*hashval++ & 1)); } - while (!(*hashval++ & 1)); + + break; } - break; + ++it; } } @@ -228,6 +240,8 @@ void collectSymbolsFromProgramHeaders( /* Get the pointer to the first entry of the symbol table */ const ElfW(Sym) * elf_sym = reinterpret_cast(base_address); + __msan_unpoison(elf_sym, sym_cnt * sizeof(*elf_sym)); + /* Iterate over the symbol table */ for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { @@ -235,6 +249,7 @@ void collectSymbolsFromProgramHeaders( * This is located at the address of st_name relative to the beginning of the string table. */ const char * sym_name = &strtab[elf_sym[sym_index].st_name]; + __msan_unpoison_string(sym_name); if (!sym_name) continue; @@ -264,13 +279,17 @@ void collectSymbolsFromProgramHeaders( #if !defined USE_MUSL String getBuildIDFromProgramHeaders(dl_phdr_info * info) { + __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { const ElfPhdr & phdr = info->dlpi_phdr[header_index]; + __msan_unpoison(&phdr, sizeof(phdr)); if (phdr.p_type != PT_NOTE) continue; - return Elf::getBuildID(reinterpret_cast(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz); + std::string_view view(reinterpret_cast(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz); + __msan_unpoison(view.data(), view.size()); + return Elf::getBuildID(view.data(), view.size()); } return {}; } From 9e513a147b5ca7ca0b75feec5488093f32df77d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 04:22:01 +0200 Subject: [PATCH 006/151] Fixup --- src/Common/SymbolIndex.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index 79f97e93a2f..394ae1a0592 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -138,6 +138,7 @@ void collectSymbolsFromProgramHeaders( * (first call is for the executable itself) */ __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); + __msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { /* Further processing is only needed if the dynamic section is reached @@ -280,6 +281,7 @@ void collectSymbolsFromProgramHeaders( String getBuildIDFromProgramHeaders(dl_phdr_info * info) { __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); + __msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { const ElfPhdr & phdr = info->dlpi_phdr[header_index]; From 72dd039d1c9cedafea3f6aa6dbf8c47b24ab61c9 Mon Sep 17 00:00:00 2001 From: xbthink Date: Sun, 7 May 2023 16:22:05 +0800 Subject: [PATCH 007/151] add comments and functional test --- src/Dictionaries/DictionaryFactory.h | 3 ++ ...ayout_to_complex_by_complex_keys.reference | 4 +++ ...nary_layout_to_complex_by_complex_keys.sql | 29 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference create mode 100644 tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql diff --git a/src/Dictionaries/DictionaryFactory.h b/src/Dictionaries/DictionaryFactory.h index 0dc80af62fc..309f14f6b7f 100644 --- a/src/Dictionaries/DictionaryFactory.h +++ b/src/Dictionaries/DictionaryFactory.h @@ -54,6 +54,9 @@ public: bool created_from_ddl)>; bool isComplex(const std::string & layout_type) const; + + /// if the argument `layout_type` is not complex layout and has corresponding complex layout, + /// change `layout_type` to corresponding complex and return true; otherwise do nothing and return false. bool convertToComplex(std::string & layout_type) const; void registerLayout(const std::string & layout_type, LayoutCreateFunction create_layout, bool is_layout_complex, bool has_layout_complex = true); diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference new file mode 100644 index 00000000000..7f38556e7d1 --- /dev/null +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference @@ -0,0 +1,4 @@ +dict_flat_simple Flat +dict_hashed_simple Hashed +dict_hashed_complex ComplexKeyHashed +dict_hashed_simple_auto_convert ComplexKeyHashed diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql new file mode 100644 index 00000000000..98c7c5c05f9 --- /dev/null +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql @@ -0,0 +1,29 @@ +DROP DICTIONARY IF EXISTS dict_flat_simple; +DROP DICTIONARY IF EXISTS dict_hashed_simple; +DROP DICTIONARY IF EXISTS dict_hashed_complex; +DROP DICTIONARY IF EXISTS dict_hashed_simple_auto_convert; +DROP TABLE IF EXISTS dict_data; + +CREATE TABLE dict_data (v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355 FROM numbers(100); + +CREATE DICTIONARY dict_flat_simple (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(flat()); +SYSTEM RELOAD DICTIONARY dict_flat_simple; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_flat_simple'; +DROP DICTIONARY dict_flat_simple; + +CREATE DICTIONARY dict_hashed_simple (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple'; +DROP DICTIONARY dict_hashed_simple; + +CREATE DICTIONARY dict_hashed_complex (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0,v1 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(complex_key_hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_complex; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_complex'; +DROP DICTIONARY dict_hashed_complex; + +CREATE DICTIONARY dict_hashed_simple_auto_convert (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0,v1 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_auto_convert; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_auto_convert'; +DROP DICTIONARY dict_hashed_simple_auto_convert; + +DROP TABLE dict_data; From 7f9b21849c4ac6dc9243426ae885e3e6b7208d6f Mon Sep 17 00:00:00 2001 From: xiebin Date: Sun, 7 May 2023 19:06:06 +0800 Subject: [PATCH 008/151] Fixed a lowercase initial letter and removed needless data --- src/Dictionaries/DictionaryFactory.h | 2 +- ...uto_convert_dictionary_layout_to_complex_by_complex_keys.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/DictionaryFactory.h b/src/Dictionaries/DictionaryFactory.h index 309f14f6b7f..35097a5ed24 100644 --- a/src/Dictionaries/DictionaryFactory.h +++ b/src/Dictionaries/DictionaryFactory.h @@ -55,7 +55,7 @@ public: bool isComplex(const std::string & layout_type) const; - /// if the argument `layout_type` is not complex layout and has corresponding complex layout, + /// If the argument `layout_type` is not complex layout and has corresponding complex layout, /// change `layout_type` to corresponding complex and return true; otherwise do nothing and return false. bool convertToComplex(std::string & layout_type) const; diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql index 98c7c5c05f9..0fb06e5acc2 100644 --- a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql @@ -4,7 +4,7 @@ DROP DICTIONARY IF EXISTS dict_hashed_complex; DROP DICTIONARY IF EXISTS dict_hashed_simple_auto_convert; DROP TABLE IF EXISTS dict_data; -CREATE TABLE dict_data (v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355 FROM numbers(100); +CREATE TABLE dict_data (v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory(); CREATE DICTIONARY dict_flat_simple (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(flat()); SYSTEM RELOAD DICTIONARY dict_flat_simple; From f0bfdb6b029748a486e5f683171f135d6a5dd957 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 22 Feb 2023 16:24:02 +0000 Subject: [PATCH 009/151] Refactor Query Tree visitor --- src/Analyzer/InDepthQueryTreeVisitor.h | 65 +++++++++++++++++++++++ src/Analyzer/Passes/CountDistinctPass.cpp | 34 ++++++++---- 2 files changed, 88 insertions(+), 11 deletions(-) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index 1cc48fb1e53..ee321842ffa 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -235,4 +235,69 @@ public: template using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor; +template +class QueryTreeVisitor +{ +public: + explicit QueryTreeVisitor(ContextPtr context_) + : current_context(std::move(context_)) + {} + + bool needApply(QueryTreeNodePtr & node) + { + return getImpl().needApply(node); + } + + void visit(QueryTreeNodePtr & node) + { + auto current_scope_context_ptr = current_context; + SCOPE_EXIT( + current_context = std::move(current_scope_context_ptr); + ); + + if (auto * query_node = node->template as()) + current_context = query_node->getContext(); + else if (auto * union_node = node->template as()) + current_context = union_node->getContext(); + + if (!TOP_TO_BOTTOM) + visitChildren(node); + + if (needApply(node)) + getImpl().apply(node); + + if (TOP_TO_BOTTOM) + visitChildren(node); + } + + const ContextPtr & getContext() const + { + return current_context; + } + + const Settings & getSettings() const + { + return current_context->getSettingsRef(); + } +private: + + Impl & getImpl() + { + return *static_cast(this); + } + + void visitChildren(QueryTreeNodePtr & node) + { + for (auto & child : node->getChildren()) + { + if (child) + visit(child); + } + } + + static constexpr bool TOP_TO_BOTTOM = Impl::TOP_TO_BOTTOM; + + ContextPtr current_context; +}; + } diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 945295f5cbc..38f7d07d052 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -16,16 +16,17 @@ namespace DB namespace { -class CountDistinctVisitor : public InDepthQueryTreeVisitorWithContext +class CountDistinctVisitor : public QueryTreeVisitor { public: - using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; + using QueryTreeVisitor::QueryTreeVisitor; - void visitImpl(QueryTreeNodePtr & node) + static constexpr bool TOP_TO_BOTTOM = true; + + bool needApply(QueryTreeNodePtr & node) { if (!getSettings().count_distinct_optimization) - return; + return false; auto * query_node = node->as(); @@ -33,32 +34,43 @@ public: if (!query_node || (query_node->hasWith() || query_node->hasPrewhere() || query_node->hasWhere() || query_node->hasGroupBy() || query_node->hasHaving() || query_node->hasWindow() || query_node->hasOrderBy() || query_node->hasLimitByLimit() || query_node->hasLimitByOffset() || query_node->hasLimitBy() || query_node->hasLimit() || query_node->hasOffset())) - return; + return false; /// Check that query has only single table expression auto join_tree_node_type = query_node->getJoinTree()->getNodeType(); if (join_tree_node_type == QueryTreeNodeType::JOIN || join_tree_node_type == QueryTreeNodeType::ARRAY_JOIN) - return; + return false; /// Check that query has only single node in projection auto & projection_nodes = query_node->getProjection().getNodes(); if (projection_nodes.size() != 1) - return; + return false; /// Check that query single projection node is `countDistinct` function auto & projection_node = projection_nodes[0]; auto * function_node = projection_node->as(); if (!function_node) - return; + return false; auto lower_function_name = Poco::toLower(function_node->getFunctionName()); if (lower_function_name != "countdistinct" && lower_function_name != "uniqexact") - return; + return false; /// Check that `countDistinct` function has single COLUMN argument auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); if (count_distinct_arguments_nodes.size() != 1 && count_distinct_arguments_nodes[0]->getNodeType() != QueryTreeNodeType::COLUMN) - return; + return false; + + return true; + } + + void apply(QueryTreeNodePtr & node) + { + auto * query_node = node->as(); + auto & projection_nodes = query_node->getProjection().getNodes(); + auto * function_node = projection_nodes[0]->as(); + + auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); auto & count_distinct_argument_column = count_distinct_arguments_nodes[0]; auto & count_distinct_argument_column_typed = count_distinct_argument_column->as(); From 5c34ee3019199a7e1d24730684c9c84e6c8e0615 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 9 May 2023 15:14:49 +0000 Subject: [PATCH 010/151] Skip unresolved table function arguments --- src/Analyzer/InDepthQueryTreeVisitor.h | 15 ++++++++++++++- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- src/Analyzer/TableFunctionNode.cpp | 3 ++- src/Analyzer/TableFunctionNode.h | 8 +++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index ee321842ffa..be3a760d4e6 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -248,6 +249,16 @@ public: return getImpl().needApply(node); } + bool shouldSkipSubtree(QueryTreeNodePtr & parent, size_t subtree_index) + { + if (auto * table_function_node = parent->as()) + { + const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes(); + return std::find(unresolved_indexes.begin(), unresolved_indexes.end(), subtree_index) != unresolved_indexes.end(); + } + return false; + } + void visit(QueryTreeNodePtr & node) { auto current_scope_context_ptr = current_context; @@ -288,10 +299,12 @@ private: void visitChildren(QueryTreeNodePtr & node) { + size_t index = 0; for (auto & child : node->getChildren()) { - if (child) + if (child && !shouldSkipSubtree(node, index)) visit(child); + ++index; } } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 7ab0261850b..aaea81dcada 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6356,7 +6356,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_function_ptr->parseArguments(table_function_ast, scope_context); auto table_function_storage = table_function_ptr->execute(table_function_ast, scope_context, table_function_ptr->getName()); - table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context); + table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context, std::move(skip_analysis_arguments_indexes)); } /// Resolve array join node in scope diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index c130503d660..30644ad4ec4 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -27,12 +27,13 @@ TableFunctionNode::TableFunctionNode(String table_function_name_) children[arguments_child_index] = std::make_shared(); } -void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context) +void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector unresolved_arguments_indexes_) { table_function = std::move(table_function_value); storage = std::move(storage_value); storage_id = storage->getStorageID(); storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); + unresolved_arguments_indexes = std::move(unresolved_arguments_indexes_); } const StorageID & TableFunctionNode::getStorageID() const diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h index 7786ba62205..69237ac8416 100644 --- a/src/Analyzer/TableFunctionNode.h +++ b/src/Analyzer/TableFunctionNode.h @@ -98,7 +98,7 @@ public: } /// Resolve table function with table function, storage and context - void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context); + void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector unresolved_arguments_indexes_); /// Get storage id, throws exception if function node is not resolved const StorageID & getStorageID() const; @@ -106,6 +106,11 @@ public: /// Get storage snapshot, throws exception if function node is not resolved const StorageSnapshotPtr & getStorageSnapshot() const; + const std::vector & getUnresolvedArgumentIndexes() const + { + return unresolved_arguments_indexes; + } + /// Return true if table function node has table expression modifiers, false otherwise bool hasTableExpressionModifiers() const { @@ -164,6 +169,7 @@ private: StoragePtr storage; StorageID storage_id; StorageSnapshotPtr storage_snapshot; + std::vector unresolved_arguments_indexes; std::optional table_expression_modifiers; SettingsChanges settings_changes; From f9eb6ca6fd767666b7b2c8e88e92e8d02639ef99 Mon Sep 17 00:00:00 2001 From: xiebin Date: Wed, 10 May 2023 23:43:40 +0800 Subject: [PATCH 011/151] if the data type of numeric key is not native uint, convert to complex. --- .../getDictionaryConfigurationFromAST.cpp | 16 ++++++++++------ ...y_layout_to_complex_by_complex_keys.reference | 1 + ...tionary_layout_to_complex_by_complex_keys.sql | 5 +++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 59f1a712d00..c179ce84ff9 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB @@ -609,17 +610,20 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte bool complex = DictionaryFactory::instance().isComplex(dictionary_layout->layout_type); - if (pk_attrs.size() > 1 && !complex - && DictionaryFactory::instance().convertToComplex(dictionary_layout->layout_type)) - { - complex = true; - } - auto all_attr_names_and_types = buildDictionaryAttributesConfiguration( xml_document, structure_element, query.dictionary_attributes_list, pk_attrs); checkPrimaryKey(all_attr_names_and_types, pk_attrs); + /// If the pk size is 1 and pk's DataType is not native uint(UInt8~UInt64), we should convert to complex, + /// because the data type of Numeric key(simple layout) is UInt64. + if ((pk_attrs.size() > 1 || (pk_attrs.size() == 1 && !WhichDataType(DataTypeFactory::instance().get(all_attr_names_and_types.find(pk_attrs[0])->second.type)).isNativeUInt())) + && !complex + && DictionaryFactory::instance().convertToComplex(dictionary_layout->layout_type)) + { + complex = true; + } + buildPrimaryKeyConfiguration(xml_document, structure_element, complex, pk_attrs, query.dictionary_attributes_list); buildLayoutConfiguration(xml_document, current_dictionary, query.dictionary->dict_settings, dictionary_layout); diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference index 7f38556e7d1..cc9381622ec 100644 --- a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference @@ -2,3 +2,4 @@ dict_flat_simple Flat dict_hashed_simple Hashed dict_hashed_complex ComplexKeyHashed dict_hashed_simple_auto_convert ComplexKeyHashed +dict_hashed_simple_int_auto_convert ComplexKeyHashed diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql index 0fb06e5acc2..77d933afa9f 100644 --- a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql @@ -26,4 +26,9 @@ SYSTEM RELOAD DICTIONARY dict_hashed_simple_auto_convert; SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_auto_convert'; DROP DICTIONARY dict_hashed_simple_auto_convert; +CREATE DICTIONARY dict_hashed_simple_int_auto_convert (v0 Int16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_int_auto_convert; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_int_auto_convert'; +DROP DICTIONARY dict_hashed_simple_int_auto_convert; + DROP TABLE dict_data; From beb3690c7ecf95f496f4cea2be7d3b19ae24046c Mon Sep 17 00:00:00 2001 From: xiebin Date: Tue, 30 May 2023 16:09:01 +0800 Subject: [PATCH 012/151] if dictionary id is number, do not convert layout to complex --- src/Dictionaries/getDictionaryConfigurationFromAST.cpp | 7 ++++--- .../queries/0_stateless/02391_hashed_dictionary_shards.sql | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index c179ce84ff9..5f8e0bc27b4 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -615,9 +615,10 @@ getDictionaryConfigurationFromAST(const ASTCreateQuery & query, ContextPtr conte checkPrimaryKey(all_attr_names_and_types, pk_attrs); - /// If the pk size is 1 and pk's DataType is not native uint(UInt8~UInt64), we should convert to complex, - /// because the data type of Numeric key(simple layout) is UInt64. - if ((pk_attrs.size() > 1 || (pk_attrs.size() == 1 && !WhichDataType(DataTypeFactory::instance().get(all_attr_names_and_types.find(pk_attrs[0])->second.type)).isNativeUInt())) + /// If the pk size is 1 and pk's DataType is not number, we should convert to complex. + /// NOTE: the data type of Numeric key(simple layout) is UInt64, so if the type is not under UInt64, type casting will lead to precision loss. + DataTypePtr first_key_type = DataTypeFactory::instance().get(all_attr_names_and_types.find(pk_attrs[0])->second.type); + if ((pk_attrs.size() > 1 || (pk_attrs.size() == 1 && !isNumber(first_key_type))) && !complex && DictionaryFactory::instance().convertToComplex(dictionary_layout->layout_type)) { diff --git a/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql b/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql index ac43c12afc0..018f6b2cf4f 100644 --- a/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql +++ b/tests/queries/0_stateless/02391_hashed_dictionary_shards.sql @@ -89,7 +89,7 @@ SOURCE(CLICKHOUSE(TABLE test_table_string)) LAYOUT(SPARSE_HASHED(SHARDS 10)) LIFETIME(0); -SYSTEM RELOAD DICTIONARY test_dictionary_10_shards_string; -- { serverError CANNOT_PARSE_TEXT } +SYSTEM RELOAD DICTIONARY test_dictionary_10_shards_string; DROP DICTIONARY test_dictionary_10_shards_string; From b66881b95a3698c050ef0ff9bff2f570fa01fba8 Mon Sep 17 00:00:00 2001 From: xiebin Date: Tue, 30 May 2023 17:59:46 +0800 Subject: [PATCH 013/151] refine functional test --- ...ayout_to_complex_by_complex_keys.reference | 6 ++-- ...nary_layout_to_complex_by_complex_keys.sql | 35 ++++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference index cc9381622ec..7616c59e4fd 100644 --- a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.reference @@ -1,5 +1,5 @@ dict_flat_simple Flat -dict_hashed_simple Hashed -dict_hashed_complex ComplexKeyHashed +dict_hashed_simple_Decimal128 Hashed +dict_hashed_simple_Float32 Hashed +dict_hashed_simple_String ComplexKeyHashed dict_hashed_simple_auto_convert ComplexKeyHashed -dict_hashed_simple_int_auto_convert ComplexKeyHashed diff --git a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql index 77d933afa9f..753b9f663b5 100644 --- a/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql +++ b/tests/queries/0_stateless/02731_auto_convert_dictionary_layout_to_complex_by_complex_keys.sql @@ -1,34 +1,35 @@ DROP DICTIONARY IF EXISTS dict_flat_simple; -DROP DICTIONARY IF EXISTS dict_hashed_simple; -DROP DICTIONARY IF EXISTS dict_hashed_complex; +DROP DICTIONARY IF EXISTS dict_hashed_simple_Decimal128; +DROP DICTIONARY IF EXISTS dict_hashed_simple_Float32; +DROP DICTIONARY IF EXISTS dict_hashed_simple_String; DROP DICTIONARY IF EXISTS dict_hashed_simple_auto_convert; DROP TABLE IF EXISTS dict_data; -CREATE TABLE dict_data (v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory(); +CREATE TABLE dict_data (v0 UInt16, v1 Int16, v2 Float32, v3 Decimal128(10), v4 String) engine=Memory() AS SELECT number, number%65535, number*1.1, number*1.1, 'foo' FROM numbers(10);; CREATE DICTIONARY dict_flat_simple (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(flat()); SYSTEM RELOAD DICTIONARY dict_flat_simple; SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_flat_simple'; DROP DICTIONARY dict_flat_simple; -CREATE DICTIONARY dict_hashed_simple (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); -SYSTEM RELOAD DICTIONARY dict_hashed_simple; -SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple'; -DROP DICTIONARY dict_hashed_simple; +CREATE DICTIONARY dict_hashed_simple_Decimal128 (v3 Decimal128(10), v1 UInt16, v2 Float32) PRIMARY KEY v3 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_Decimal128; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_Decimal128'; +DROP DICTIONARY dict_hashed_simple_Decimal128; -CREATE DICTIONARY dict_hashed_complex (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0,v1 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(complex_key_hashed()); -SYSTEM RELOAD DICTIONARY dict_hashed_complex; -SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_complex'; -DROP DICTIONARY dict_hashed_complex; +CREATE DICTIONARY dict_hashed_simple_Float32 (v2 Float32, v3 Decimal128(10), v4 String) PRIMARY KEY v2 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_Float32; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_Float32'; +DROP DICTIONARY dict_hashed_simple_Float32; -CREATE DICTIONARY dict_hashed_simple_auto_convert (v0 UInt16, v1 UInt16, v2 UInt16) PRIMARY KEY v0,v1 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +CREATE DICTIONARY dict_hashed_simple_String (v4 String, v3 Decimal128(10), v2 Float32) PRIMARY KEY v4 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); +SYSTEM RELOAD DICTIONARY dict_hashed_simple_String; +SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_String'; +DROP DICTIONARY dict_hashed_simple_String; + +CREATE DICTIONARY dict_hashed_simple_auto_convert (v0 UInt16, v1 Int16, v2 UInt16) PRIMARY KEY v0,v1 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); SYSTEM RELOAD DICTIONARY dict_hashed_simple_auto_convert; SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_auto_convert'; DROP DICTIONARY dict_hashed_simple_auto_convert; -CREATE DICTIONARY dict_hashed_simple_int_auto_convert (v0 Int16, v1 UInt16, v2 UInt16) PRIMARY KEY v0 SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(0) LAYOUT(hashed()); -SYSTEM RELOAD DICTIONARY dict_hashed_simple_int_auto_convert; -SELECT name, type FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_hashed_simple_int_auto_convert'; -DROP DICTIONARY dict_hashed_simple_int_auto_convert; - DROP TABLE dict_data; From 8b1cd9fcec4408933d537bd1b74a382884d7b52f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 02:27:10 +0200 Subject: [PATCH 014/151] Remove metadata_cache from config --- programs/server/config.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index d18b4cb2ac9..f52241ff44d 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1550,12 +1550,6 @@ - - - + --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml index 5f7769f7403..830c75f7378 100644 --- a/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml @@ -1,3 +1,7 @@ - 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + diff --git a/tests/integration/test_config_decryption/configs/config_subnodes.xml b/tests/integration/test_config_decryption/configs/config_subnodes.xml index b0e519ff546..8213270f747 100644 --- a/tests/integration/test_config_decryption/configs/config_subnodes.xml +++ b/tests/integration/test_config_decryption/configs/config_subnodes.xml @@ -1,10 +1,14 @@ + 00112233445566778899aabbccddeeff - + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + diff --git a/tests/integration/test_config_decryption/configs/config_wrong_method.xml b/tests/integration/test_config_decryption/configs/config_wrong_method.xml index b452ce6374c..b96c13d5105 100644 --- a/tests/integration/test_config_decryption/configs/config_wrong_method.xml +++ b/tests/integration/test_config_decryption/configs/config_wrong_method.xml @@ -1,4 +1,5 @@ + 00112233445566778899aabbccddeeff @@ -7,6 +8,8 @@ 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff - 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index b148f9a051a..c6987d12324 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -15,7 +15,7 @@ def start_clickhouse(config, err_msg): def test_wrong_method(): start_clickhouse( - "configs/config_wrong_method.xml", "Wrong encryption method. Got WRONG" + "configs/config_wrong_method.xml", "Unknown encryption method. Got WRONG" ) From 7d430b803784084a09fd71f98f1a7e8872feb307 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 25 Jul 2023 23:38:03 +0400 Subject: [PATCH 056/151] retry ConnectionResetException --- src/IO/S3/Client.cpp | 79 +++++++++++-------- src/IO/S3/Client.h | 2 +- .../test_checking_s3_blobs_paranoid/test.py | 22 ++++-- 3 files changed, 65 insertions(+), 38 deletions(-) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 94a7b5166da..51c7ee32579 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -357,12 +357,14 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c Model::ListObjectsV2Outcome Client::ListObjectsV2(const ListObjectsV2Request & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); }); } Model::ListObjectsOutcome Client::ListObjects(const ListObjectsRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); }); } Model::GetObjectOutcome Client::GetObject(const GetObjectRequest & request) const @@ -372,19 +374,19 @@ Model::GetObjectOutcome Client::GetObject(const GetObjectRequest & request) cons Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(const AbortMultipartUploadRequest & request) const { - return doRequestWithRetryNetworkErrors( + return doRequestWithRetryNetworkErrors( request, [this](const Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); }); } Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(const CreateMultipartUploadRequest & request) const { - return doRequestWithRetryNetworkErrors( + return doRequestWithRetryNetworkErrors( request, [this](const Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); }); } Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const CompleteMultipartUploadRequest & request) const { - auto outcome = doRequestWithRetryNetworkErrors( + auto outcome = doRequestWithRetryNetworkErrors( request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); }); if (!outcome.IsSuccess() || provider_type != ProviderType::GCS) @@ -414,32 +416,38 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(const Comp Model::CopyObjectOutcome Client::CopyObject(const CopyObjectRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); }); } Model::PutObjectOutcome Client::PutObject(const PutObjectRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::PutObjectRequest & req) { return PutObject(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::PutObjectRequest & req) { return PutObject(req); }); } Model::UploadPartOutcome Client::UploadPart(const UploadPartRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); }); } Model::UploadPartCopyOutcome Client::UploadPartCopy(const UploadPartCopyRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); }); } Model::DeleteObjectOutcome Client::DeleteObject(const DeleteObjectRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::DeleteObjectRequest & req) { return DeleteObject(req); }); } Model::DeleteObjectsOutcome Client::DeleteObjects(const DeleteObjectsRequest & request) const { - return doRequestWithRetryNetworkErrors(request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); }); + return doRequestWithRetryNetworkErrors( + request, [this](const Model::DeleteObjectsRequest & req) { return DeleteObjects(req); }); } Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest & request) const @@ -468,7 +476,8 @@ Client::ComposeObjectOutcome Client::ComposeObject(const ComposeObjectRequest & return ComposeObjectOutcome(MakeRequest(req, endpointResolutionOutcome.GetResult(), Aws::Http::HttpMethod::HTTP_PUT)); }; - return doRequestWithRetryNetworkErrors(request, request_fn); + return doRequestWithRetryNetworkErrors( + request, request_fn); } template @@ -549,52 +558,60 @@ Client::doRequest(const RequestType & request, RequestFn request_fn) const throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects"); } -template +template std::invoke_result_t Client::doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const { auto with_retries = [this, request_fn_ = std::move(request_fn)] (const RequestType & request_) { - const size_t max_tries = 10; + chassert(client_configuration.retryStrategy); + const Int64 max_attempts = client_configuration.retryStrategy->GetMaxAttempts(); std::exception_ptr last_exception = nullptr; - for (size_t try_no = 0; try_no < max_tries; ++try_no) + for (Int64 attempt_no = 0; attempt_no < max_attempts; ++attempt_no) { try { + /// S3 does retries network errors actually. + /// But it is matter when errors occur. + /// This code retries a specific case when + /// network error happens when XML document is being read from the response body. + /// Hence, the response body is a stream, network errors are possible at reading. + /// S3 doesn't retry them. + + /// Not all requests can be retried in that way. + /// Requests that read out response body to build the result are possible to retry. + /// Requests that expose the response stream as an answer are not retried with that code. E.g. GetObject. return request_fn_(request_); } catch (Poco::Net::ConnectionResetException &) { - // to do distinguish read/write - if (client_configuration.for_disk_s3) + + if constexpr (IsReadMethod) { - ProfileEvents::increment(ProfileEvents::DiskS3WriteRequestsErrors); - ProfileEvents::increment(ProfileEvents::DiskS3ReadRequestsErrors); + if (client_configuration.for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3ReadRequestsErrors); + else + ProfileEvents::increment(ProfileEvents::S3ReadRequestsErrors); } else { - ProfileEvents::increment(ProfileEvents::S3WriteRequestsErrors); - ProfileEvents::increment(ProfileEvents::S3ReadRequestsErrors); + if (client_configuration.for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3WriteRequestsErrors); + else + ProfileEvents::increment(ProfileEvents::S3WriteRequestsErrors); } tryLogCurrentException(log, "Will retry"); - // to do back off last_exception = std::current_exception(); + + auto error = Aws::Client::AWSError(Aws::Client::CoreErrors::NETWORK_CONNECTION, /*retry*/ true); + client_configuration.retryStrategy->CalculateDelayBeforeNextRetry(error, attempt_no); continue; } } chassert(last_exception); std::rethrow_exception(last_exception); - -// try -// { -// std::rethrow_exception(last_exception); -// } -// catch (const Poco::Exception & e) -// { -// throw Exception(Exception::CreateFromPocoTag{}, e); -// } }; return doRequest(request, with_retries); diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index bb893c21774..1b0fdcefe32 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -250,7 +250,7 @@ private: std::invoke_result_t doRequest(const RequestType & request, RequestFn request_fn) const; - template + template std::invoke_result_t doRequestWithRetryNetworkErrors(const RequestType & request, RequestFn request_fn) const; diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 6aa149b9c7b..28b0c9beeaa 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -248,7 +248,9 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried( action_args=["1"] if send_something else ["0"], ) - insert_query_id = f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD" + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}" + ) node.query( f""" INSERT INTO @@ -283,7 +285,9 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried( action="connection_reset_by_peer", action_args=["1"] if send_something else ["0"], ) - insert_query_id = f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_1" + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}_1" + ) error = node.query_and_get_error( f""" INSERT INTO @@ -307,7 +311,8 @@ def test_when_s3_connection_reset_by_peer_at_upload_is_retried( assert "Code: 1000" in error, error assert ( "DB::Exception: Connection reset by peer." in error - or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" in error + or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" + in error ), error @@ -325,7 +330,9 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( action_args=["1"] if send_something else ["0"], ) - insert_query_id = f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD" + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}" + ) node.query( f""" INSERT INTO @@ -361,7 +368,9 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( action_args=["1"] if send_something else ["0"], ) - insert_query_id = f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_1" + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}_1" + ) error = node.query_and_get_error( f""" INSERT INTO @@ -385,5 +394,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( assert "Code: 1000" in error, error assert ( "DB::Exception: Connection reset by peer." in error - or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" in error + or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" + in error ), error From 9ea479a1a1b77b658f4d8504cb140c09121d44c8 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 26 Jul 2023 13:37:33 +0000 Subject: [PATCH 057/151] Analyzer: Support ARRAY JOIN COLUMNS(...) syntax --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 106 ++++++++++-------- .../02833_array_join_columns.reference | 0 .../0_stateless/02833_array_join_columns.sql | 19 ++++ 3 files changed, 78 insertions(+), 47 deletions(-) create mode 100644 tests/queries/0_stateless/02833_array_join_columns.reference create mode 100644 tests/queries/0_stateless/02833_array_join_columns.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index abf02547ccd..5dbe9e350bf 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6477,55 +6477,69 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - auto result_type = array_join_expression->getResultType(); - bool is_array_type = isArray(result_type); - bool is_map_type = isMap(result_type); - - if (!is_array_type && !is_map_type) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "ARRAY JOIN {} requires expression {} with Array or Map type. Actual {}. In scope {}", - array_join_node_typed.formatASTForErrorMessage(), - array_join_expression->formatASTForErrorMessage(), - result_type->getName(), - scope.scope_node->formatASTForErrorMessage()); - - if (is_map_type) - result_type = assert_cast(*result_type).getNestedType(); - - result_type = assert_cast(*result_type).getNestedType(); - - String array_join_column_name; - - if (!array_join_expression_alias.empty()) + auto process_array_join_expression = [&](QueryTreeNodePtr & expression) { - array_join_column_name = array_join_expression_alias; - } - else if (auto * array_join_expression_inner_column = array_join_expression->as()) + auto result_type = expression->getResultType(); + bool is_array_type = isArray(result_type); + bool is_map_type = isMap(result_type); + + if (!is_array_type && !is_map_type) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "ARRAY JOIN {} requires expression {} with Array or Map type. Actual {}. In scope {}", + array_join_node_typed.formatASTForErrorMessage(), + expression->formatASTForErrorMessage(), + result_type->getName(), + scope.scope_node->formatASTForErrorMessage()); + + if (is_map_type) + result_type = assert_cast(*result_type).getNestedType(); + + result_type = assert_cast(*result_type).getNestedType(); + + String array_join_column_name; + + if (!array_join_expression_alias.empty()) + { + array_join_column_name = array_join_expression_alias; + } + else if (auto * array_join_expression_inner_column = array_join_expression->as()) + { + array_join_column_name = array_join_expression_inner_column->getColumnName(); + } + else if (!identifier_full_name.empty()) + { + array_join_column_name = identifier_full_name; + } + else + { + array_join_column_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter); + ++array_join_expressions_counter; + } + + if (array_join_column_names.contains(array_join_column_name)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ARRAY JOIN {} multiple columns with name {}. In scope {}", + array_join_node_typed.formatASTForErrorMessage(), + array_join_column_name, + scope.scope_node->formatASTForErrorMessage()); + array_join_column_names.emplace(array_join_column_name); + + NameAndTypePair array_join_column(array_join_column_name, result_type); + auto array_join_column_node = std::make_shared(std::move(array_join_column), expression, array_join_node); + array_join_column_node->setAlias(array_join_expression_alias); + array_join_column_expressions.push_back(std::move(array_join_column_node)); + }; + + // Support ARRAY JOIN COLUMNS(...). COLUMNS trasformer is resolved to list of columns. + if (auto * columns_list = array_join_expression->as()) { - array_join_column_name = array_join_expression_inner_column->getColumnName(); - } - else if (!identifier_full_name.empty()) - { - array_join_column_name = identifier_full_name; + for (auto & array_join_subexpression : columns_list->getNodes()) + process_array_join_expression(array_join_subexpression); } else { - array_join_column_name = "__array_join_expression_" + std::to_string(array_join_expressions_counter); - ++array_join_expressions_counter; + process_array_join_expression(array_join_expression); } - - if (array_join_column_names.contains(array_join_column_name)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "ARRAY JOIN {} multiple columns with name {}. In scope {}", - array_join_node_typed.formatASTForErrorMessage(), - array_join_column_name, - scope.scope_node->formatASTForErrorMessage()); - array_join_column_names.emplace(array_join_column_name); - - NameAndTypePair array_join_column(array_join_column_name, result_type); - auto array_join_column_node = std::make_shared(std::move(array_join_column), array_join_expression, array_join_node); - array_join_column_node->setAlias(array_join_expression_alias); - array_join_column_expressions.push_back(std::move(array_join_column_node)); } /** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because @@ -6537,11 +6551,9 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif * And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression * with type after ARRAY JOIN. */ - for (size_t i = 0; i < array_join_nodes_size; ++i) + array_join_nodes = std::move(array_join_column_expressions); + for (auto & array_join_column_expression : array_join_nodes) { - auto & array_join_column_expression = array_join_nodes[i]; - array_join_column_expression = std::move(array_join_column_expressions[i]); - auto it = scope.alias_name_to_expression_node.find(array_join_column_expression->getAlias()); if (it != scope.alias_name_to_expression_node.end()) { diff --git a/tests/queries/0_stateless/02833_array_join_columns.reference b/tests/queries/0_stateless/02833_array_join_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02833_array_join_columns.sql b/tests/queries/0_stateless/02833_array_join_columns.sql new file mode 100644 index 00000000000..3f9a33a3959 --- /dev/null +++ b/tests/queries/0_stateless/02833_array_join_columns.sql @@ -0,0 +1,19 @@ +drop table if exists test_array_joins; +drop table if exists v4test_array_joins; + +create table test_array_joins +( + id UInt64 default rowNumberInAllBlocks() + 1, + arr_1 Array(String), + arr_2 Array(String), + arr_3 Array(String), + arr_4 Array(String) +) engine = MergeTree order by id; + +insert into test_array_joins (id,arr_1, arr_2, arr_3, arr_4) +SELECT number,array(randomPrintableASCII(3)),array(randomPrintableASCII(3)),array(randomPrintableASCII(3)),array(randomPrintableASCII(3)) +from numbers(1000); + +create view v4test_array_joins as SELECT * from test_array_joins where id != 10; + +select * from v4test_array_joins array join columns('^arr') where match(arr_4,'a') and id < 100 order by id format Null settings optimize_read_in_order = 0; From dccbe875d247818a17e999ceab5e062537169f80 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 26 Jul 2023 15:37:58 +0200 Subject: [PATCH 058/151] check if storage shutdown before we operate MergeTreeDeduplicationLog --- .../MergeTree/MergeTreeDeduplicationLog.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 09a04f13fc7..53481ab06a0 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -231,6 +233,12 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: return std::make_pair(info, false); } + if (stopped) + { + LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we add this part."); + return {}; + } + chassert(current_writer != nullptr); /// Create new record @@ -261,6 +269,12 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf if (deduplication_window == 0) return; + if (stopped) + { + LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we drop this part."); + return; + } + chassert(current_writer != nullptr); for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */) From 338188ae7f1ccdb399671cbfae584ff79705097a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jul 2023 14:10:27 +0000 Subject: [PATCH 059/151] fix test --- tests/queries/0_stateless/02791_remote_paths_refcount.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.sql b/tests/queries/0_stateless/02791_remote_paths_refcount.sql index e64df599d32..180601738ad 100644 --- a/tests/queries/0_stateless/02791_remote_paths_refcount.sql +++ b/tests/queries/0_stateless/02791_remote_paths_refcount.sql @@ -2,6 +2,10 @@ DROP TABLE IF EXISTS t_refcount SYNC; +-- Names of parts (on which this test depends) +-- can differ in case of fault injection. +SET insert_keeper_fault_injection_probability = 0.0; + CREATE TABLE t_refcount (id UInt64, v UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t_refcount', '1') ORDER BY id PARTITION BY id % 2 From 89f2e8cdea8d7f32be735cd86326d1cbed24e158 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Wed, 26 Jul 2023 23:02:57 +0800 Subject: [PATCH 060/151] Fix S3 table function does not work for pre-signed URL --- src/TableFunctions/TableFunctionS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c8cc0cddd30..3637b3e9eb2 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -160,7 +160,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.keys = {configuration.url.key}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.uri.getPath(), true); + configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(configuration.url.uri.getPath()).getPath(), true); } void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) From b8cac9499d01bd51e4b8a669c7d23104c656dc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 26 Jul 2023 13:18:09 +0000 Subject: [PATCH 061/151] Add tests to reproduce the problem --- .../0_stateless/02833_local_with_dialect.reference | 2 ++ tests/queries/0_stateless/02833_local_with_dialect.sh | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02833_local_with_dialect.reference create mode 100755 tests/queries/0_stateless/02833_local_with_dialect.sh diff --git a/tests/queries/0_stateless/02833_local_with_dialect.reference b/tests/queries/0_stateless/02833_local_with_dialect.reference new file mode 100644 index 00000000000..dbb67375997 --- /dev/null +++ b/tests/queries/0_stateless/02833_local_with_dialect.reference @@ -0,0 +1,2 @@ +0 +[?2004h[?2004lBye. diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh new file mode 100755 index 00000000000..2a2e1b09459 --- /dev/null +++ b/tests/queries/0_stateless/02833_local_with_dialect.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive From 3a6aaa29c9db0db1bc2875b7323c334148da387f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 26 Jul 2023 15:25:57 +0000 Subject: [PATCH 062/151] Do not load suggestions in case not ClickHouse dialects --- programs/local/LocalServer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 3c2a8ae3152..6ac7edaf1d9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -569,7 +569,9 @@ void LocalServer::processConfig() } print_stack_trace = config().getBool("stacktrace", false); - load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false); + const std::string clickhouse_dialect{"clickhouse"}; + load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) + && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; auto logging = (config().has("logger.console") || config().has("logger.level") From 5aeeaebd00c90385b1d9d8e3c6b13d298240c752 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jul 2023 17:09:23 -0300 Subject: [PATCH 063/151] init and destroy ares channel on demand.. --- src/Common/CaresPTRResolver.cpp | 71 ++++++++++--------- src/Common/CaresPTRResolver.h | 29 ++++---- .../tests/gtest_dns_reverse_resolve.cpp | 35 ++++----- 3 files changed, 71 insertions(+), 64 deletions(-) diff --git a/src/Common/CaresPTRResolver.cpp b/src/Common/CaresPTRResolver.cpp index fee4d01eb37..bf61e940745 100644 --- a/src/Common/CaresPTRResolver.cpp +++ b/src/Common/CaresPTRResolver.cpp @@ -41,9 +41,25 @@ namespace DB } } - std::mutex CaresPTRResolver::mutex; + struct AresChannelRAII + { + AresChannelRAII() + { + if (ares_init(&channel) != ARES_SUCCESS) + { + throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to initialize c-ares channel"); + } + } - CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr) + ~AresChannelRAII() + { + ares_destroy(channel); + } + + ares_channel channel; + }; + + CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) { /* * ares_library_init is not thread safe. Currently, the only other usage of c-ares seems to be in grpc. @@ -57,34 +73,22 @@ namespace DB * */ static const auto library_init_result = ares_library_init(ARES_LIB_INIT_ALL); - if (library_init_result != ARES_SUCCESS || ares_init(&channel) != ARES_SUCCESS) + if (library_init_result != ARES_SUCCESS) { throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to initialize c-ares"); } } - CaresPTRResolver::~CaresPTRResolver() - { - ares_destroy(channel); - /* - * Library initialization is currently done only once in the constructor. Multiple instances of CaresPTRResolver - * will be used in the lifetime of ClickHouse, thus it's problematic to have de-init here. - * In a practical view, it makes little to no sense to de-init a DNS library since DNS requests will happen - * until the end of the program. Hence, ares_library_cleanup() will not be called. - * */ - } - std::unordered_set CaresPTRResolver::resolve(const std::string & ip) { - std::lock_guard guard(mutex); + AresChannelRAII channel_raii; std::unordered_set ptr_records; - resolve(ip, ptr_records); + resolve(ip, ptr_records, channel_raii.channel); - if (!wait_and_process()) + if (!wait_and_process(channel_raii.channel)) { - cancel_requests(); throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip); } @@ -93,22 +97,21 @@ namespace DB std::unordered_set CaresPTRResolver::resolve_v6(const std::string & ip) { - std::lock_guard guard(mutex); + AresChannelRAII channel_raii; std::unordered_set ptr_records; - resolve_v6(ip, ptr_records); + resolve_v6(ip, ptr_records, channel_raii.channel); - if (!wait_and_process()) + if (!wait_and_process(channel_raii.channel)) { - cancel_requests(); throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip); } return ptr_records; } - void CaresPTRResolver::resolve(const std::string & ip, std::unordered_set & response) + void CaresPTRResolver::resolve(const std::string & ip, std::unordered_set & response, ares_channel channel) { in_addr addr; @@ -117,7 +120,7 @@ namespace DB ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET, callback, &response); } - void CaresPTRResolver::resolve_v6(const std::string & ip, std::unordered_set & response) + void CaresPTRResolver::resolve_v6(const std::string & ip, std::unordered_set & response, ares_channel channel) { in6_addr addr; inet_pton(AF_INET6, ip.c_str(), &addr); @@ -125,15 +128,15 @@ namespace DB ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET6, callback, &response); } - bool CaresPTRResolver::wait_and_process() + bool CaresPTRResolver::wait_and_process(ares_channel channel) { int sockets[ARES_GETSOCK_MAXNUM]; pollfd pollfd[ARES_GETSOCK_MAXNUM]; while (true) { - auto readable_sockets = get_readable_sockets(sockets, pollfd); - auto timeout = calculate_timeout(); + auto readable_sockets = get_readable_sockets(sockets, pollfd, channel); + auto timeout = calculate_timeout(channel); int number_of_fds_ready = 0; if (!readable_sockets.empty()) @@ -158,11 +161,11 @@ namespace DB if (number_of_fds_ready > 0) { - process_readable_sockets(readable_sockets); + process_readable_sockets(readable_sockets, channel); } else { - process_possible_timeout(); + process_possible_timeout(channel); break; } } @@ -170,12 +173,12 @@ namespace DB return true; } - void CaresPTRResolver::cancel_requests() + void CaresPTRResolver::cancel_requests(ares_channel channel) { ares_cancel(channel); } - std::span CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd) + std::span CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel) { int sockets_bitmask = ares_getsock(channel, sockets, ARES_GETSOCK_MAXNUM); @@ -205,7 +208,7 @@ namespace DB return std::span(pollfd, number_of_sockets_to_poll); } - int64_t CaresPTRResolver::calculate_timeout() + int64_t CaresPTRResolver::calculate_timeout(ares_channel channel) { timeval tv; if (auto * tvp = ares_timeout(channel, nullptr, &tv)) @@ -218,14 +221,14 @@ namespace DB return 0; } - void CaresPTRResolver::process_possible_timeout() + void CaresPTRResolver::process_possible_timeout(ares_channel channel) { /* Call ares_process() unconditonally here, even if we simply timed out above, as otherwise the ares name resolve won't timeout! */ ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD); } - void CaresPTRResolver::process_readable_sockets(std::span readable_sockets) + void CaresPTRResolver::process_readable_sockets(std::span readable_sockets, ares_channel channel) { for (auto readable_socket : readable_sockets) { diff --git a/src/Common/CaresPTRResolver.h b/src/Common/CaresPTRResolver.h index 454509ae43c..24a5e422ca8 100644 --- a/src/Common/CaresPTRResolver.h +++ b/src/Common/CaresPTRResolver.h @@ -28,32 +28,35 @@ namespace DB public: explicit CaresPTRResolver(provider_token); - ~CaresPTRResolver() override; + + /* + * Library initialization is currently done only once in the constructor. Multiple instances of CaresPTRResolver + * will be used in the lifetime of ClickHouse, thus it's problematic to have de-init here. + * In a practical view, it makes little to no sense to de-init a DNS library since DNS requests will happen + * until the end of the program. Hence, ares_library_cleanup() will not be called. + * */ + ~CaresPTRResolver() override = default; std::unordered_set resolve(const std::string & ip) override; std::unordered_set resolve_v6(const std::string & ip) override; private: - bool wait_and_process(); + bool wait_and_process(ares_channel channel); - void cancel_requests(); + void cancel_requests(ares_channel channel); - void resolve(const std::string & ip, std::unordered_set & response); + void resolve(const std::string & ip, std::unordered_set & response, ares_channel channel); - void resolve_v6(const std::string & ip, std::unordered_set & response); + void resolve_v6(const std::string & ip, std::unordered_set & response, ares_channel channel); - std::span get_readable_sockets(int * sockets, pollfd * pollfd); + std::span get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel); - int64_t calculate_timeout(); + int64_t calculate_timeout(ares_channel channel); - void process_possible_timeout(); + void process_possible_timeout(ares_channel channel); - void process_readable_sockets(std::span readable_sockets); - - ares_channel channel; - - static std::mutex mutex; + void process_readable_sockets(std::span readable_sockets, ares_channel channel); }; } diff --git a/src/Common/tests/gtest_dns_reverse_resolve.cpp b/src/Common/tests/gtest_dns_reverse_resolve.cpp index 08351564eaf..de33deddac3 100644 --- a/src/Common/tests/gtest_dns_reverse_resolve.cpp +++ b/src/Common/tests/gtest_dns_reverse_resolve.cpp @@ -9,34 +9,35 @@ namespace DB { TEST(Common, ReverseDNS) { - auto addresses = std::vector({ - "8.8.8.8", "2001:4860:4860::8888", // dns.google - "142.250.219.35", // google.com - "157.240.12.35", // facebook - "208.84.244.116", "2600:1419:c400::214:c410", //www.terra.com.br, - "127.0.0.1", "::1" - }); - auto func = [&]() { // Good random seed, good engine auto rnd1 = std::mt19937(std::random_device{}()); - for (int i = 0; i < 50; ++i) + for (int i = 0; i < 10; ++i) { auto & dns_resolver_instance = DNSResolver::instance(); -// unfortunately, DNS cache can't be disabled because we might end up causing a DDoS attack -// dns_resolver_instance.setDisableCacheFlag(); + dns_resolver_instance.setDisableCacheFlag(); - auto addr_index = rnd1() % addresses.size(); + auto val1 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val2 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val3 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val4 = rnd1() % static_cast((pow(2, 31) - 1)); - [[maybe_unused]] auto result = dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ addresses[addr_index] }); + uint32_t ipv4_buffer[1] = { + static_cast(val1) + }; -// will not assert either because some of the IP addresses might change in the future and -// this test will become flaky -// ASSERT_TRUE(!result.empty()); + uint32_t ipv6_buffer[4] = { + static_cast(val1), + static_cast(val2), + static_cast(val3), + static_cast(val4) + }; + + dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ ipv4_buffer, sizeof(ipv4_buffer)}); + dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ ipv6_buffer, sizeof(ipv6_buffer)}); } - }; auto number_of_threads = 200u; From 6aab4cc83508093d19ed84130be483e73c08b324 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 05:25:40 +0200 Subject: [PATCH 064/151] Check for unexpected cyrillic --- src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/Disks/DiskEncrypted.cpp | 4 ++-- src/IO/examples/read_buffer.cpp | 4 ++-- src/IO/examples/write_buffer.cpp | 2 +- src/IO/examples/write_buffer_perf.cpp | 2 +- src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp | 2 +- utils/check-style/check-style | 3 +++ 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 3f4e35a78a4..34b621291fd 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -493,7 +493,7 @@ void CompressionCodecEncrypted::Configuration::loadImpl( /// If there is only one key with non zero ID, curren_key_id should be defined. if (new_params->keys_storage[method].size() == 1 && !new_params->keys_storage[method].contains(0)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. сurrent_key_id is required"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. current_key_id is required"); } /// Try to find which key will be used for encryption. If there is no current_key and only one key without id diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 441e639b967..aa9c4a92adc 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -433,10 +433,10 @@ void DiskEncrypted::applyNewSettings( { auto new_settings = parseDiskEncryptedSettings(name, config, config_prefix, disk_map); if (new_settings->wrapped_disk != delegate) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging wrapped disk on the fly is not supported. Disk {}", name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing wrapped disk on the fly is not supported. Disk {}", name); if (new_settings->disk_path != disk_path) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing disk path on the fly is not supported. Disk {}", name); current_settings.set(std::move(new_settings)); IDisk::applyNewSettings(config, context, config_prefix, disk_map); diff --git a/src/IO/examples/read_buffer.cpp b/src/IO/examples/read_buffer.cpp index 85675c0d613..221da24715b 100644 --- a/src/IO/examples/read_buffer.cpp +++ b/src/IO/examples/read_buffer.cpp @@ -40,7 +40,7 @@ int readAndPrint(DB::ReadBuffer & in) int main(int, char **) { { - std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'"; + std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'"; DB::ReadBufferFromString in(s); if (readAndPrint(in)) std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl; @@ -49,7 +49,7 @@ int main(int, char **) std::shared_ptr in; { - std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'"; + std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'"; in = std::make_shared(s); } if (readAndPrint(*in)) diff --git a/src/IO/examples/write_buffer.cpp b/src/IO/examples/write_buffer.cpp index bca0be24b1a..999f9b1bb34 100644 --- a/src/IO/examples/write_buffer.cpp +++ b/src/IO/examples/write_buffer.cpp @@ -14,7 +14,7 @@ int main(int, char **) { DB::Int64 a = -123456; DB::Float64 b = 123.456; - DB::String c = "вася пе\tтя"; + DB::String c = "вася pe\ttya"; DB::String d = "'xyz\\"; std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM diff --git a/src/IO/examples/write_buffer_perf.cpp b/src/IO/examples/write_buffer_perf.cpp index 0b3d0a61241..3f57ddb9a4f 100644 --- a/src/IO/examples/write_buffer_perf.cpp +++ b/src/IO/examples/write_buffer_perf.cpp @@ -14,7 +14,7 @@ int main(int, char **) { DB::Int64 a = -123456; DB::Float64 b = 123.456; - DB::String c = "вася пе\tтя"; + DB::String c = "вася pe\ttya"; DB::String d = "'xyz\\"; std::ofstream s("test"); diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp index 229a0630328..cff83b0ad3b 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp @@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate() else if (name == names[3]) { auto column = types[3]->createColumn(); - /// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). + /// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6"; assert_cast(*column).insertData(version.data(), version.size()); res.addColumn(std::move(column)); diff --git a/utils/check-style/check-style b/utils/check-style/check-style index c28ca1cfc8a..67c185a0b54 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -413,3 +413,6 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep - # Check for bad punctuation: whitespace before comma. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" + +# Cyrillic characters hiding inside Latin. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place." From d35c87c1bdf4cba8848e94acd0ed8ec1f6b34502 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 05:06:16 +0000 Subject: [PATCH 065/151] allow positional options for clickhouse-local and populate global udf settings --- programs/local/LocalServer.cpp | 15 +++++++++++++++ src/Client/ClientBase.cpp | 5 ----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 3c2a8ae3152..3ee268aed0e 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -266,6 +266,10 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); // user's files are everywhere + std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); + global_context->setUserScriptsPath(user_scripts_path); + fs::create_directories(user_scripts_path); + /// top_level_domains_lists const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/"); if (!top_level_domains_path.empty()) @@ -490,6 +494,17 @@ try applyCmdSettings(global_context); + /// try to load user defined executable functions, throw on error and die + try + { + global_context->loadOrReloadUserDefinedExecutableFunctions(config()); + } + catch (...) + { + tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions."); + throw; + } + if (is_interactive) { clearTerminal(); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 3e964d5c6a3..496fc8fce0a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2624,11 +2624,6 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); } - /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). - unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); - po::store(parsed, options); } From d2d7139da3af470a49267047f0ea45f652d59e45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 08:58:23 +0200 Subject: [PATCH 066/151] Changelog for 23.7 --- CHANGELOG.md | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf6b309ef2c..878edfa4add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +10,180 @@ # 2023 Changelog +### ClickHouse release 23.7, 2023-07-27 + +#### Backward Incompatible Change +* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added support for PRQL as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). +* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). + +#### Experimental Feature +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. +* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + + ### ClickHouse release 23.6, 2023-06-29 #### Backward Incompatible Change From ac51ade45c5581d031c382277b550b2fb2f873fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 27 Jul 2023 08:45:53 +0000 Subject: [PATCH 067/151] Do not run the test without Rust libraries --- tests/queries/0_stateless/02833_local_with_dialect.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh index 2a2e1b09459..012a6d91269 100755 --- a/tests/queries/0_stateless/02833_local_with_dialect.sh +++ b/tests/queries/0_stateless/02833_local_with_dialect.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 2b18872e86898fe0c0ee40ddecf05c29088a7aca Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jul 2023 08:55:19 +0000 Subject: [PATCH 068/151] Incorporate review feedback --- docs/en/operations/system-tables/query_log.md | 8 ++++---- src/Interpreters/Cache/QueryCache.h | 8 ++++---- src/Interpreters/QueryLog.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index b8dc0c0224c..835c79129de 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -112,10 +112,10 @@ Columns: - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - - `'None' = 1` = The query result was neither written into nor read from the query cache. - - `'Write' = 1` = The query result was written into the query cache. - - `'Read' = 1` = The query result was read from the query cache. - - `'Unknown' = 1` = Unknown status. + - `'Unknown' = 1` = Status unknown. + - `'None' = 2` = The query result was neither written into nor read from the query cache. + - `'Write' = 3` = The query result was written into the query cache. + - `'Read' = 4` = The query result was read from the query cache. **Example** diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 973015b8003..5fe756268f2 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -27,10 +27,10 @@ public: enum class Usage { /// starts at 1 for compatibitity with DataTypeEnum8 - None = 1, /// query result neither written nor read into/from query cache - Write, /// query result wrote into query cache - Read, /// query result read from query cache - Unknown, /// we don't know what what happened + Unknown = 1, /// we don't know what what happened + None, /// query result neither written nor read into/from query cache + Write, /// query result written into query cache + Read, /// query result read from query cache }; /// Represents a query result in the cache. diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index c3294512f14..df21e82305a 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -44,10 +44,10 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() auto query_cache_usage_datatype = std::make_shared( DataTypeEnum8::Values { + {"Unknown", static_cast(QueryCache::Usage::Unknown)}, {"None", static_cast(QueryCache::Usage::None)}, {"Write", static_cast(QueryCache::Usage::Write)}, - {"Read", static_cast(QueryCache::Usage::Read)}, - {"Unknown", static_cast(QueryCache::Usage::Unknown)} + {"Read", static_cast(QueryCache::Usage::Read)} }); auto low_cardinality_string = std::make_shared(std::make_shared()); From 043ad45ec3f4f2d1ff8b619da4c6ec5cb2b8f0d0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jul 2023 09:18:24 +0000 Subject: [PATCH 069/151] Incorporate review feedback, pt. II --- docs/en/operations/system-tables/query_log.md | 8 ++++---- src/Interpreters/Cache/QueryCache.h | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 835c79129de..c6f565b8748 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -112,10 +112,10 @@ Columns: - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - - `'Unknown' = 1` = Status unknown. - - `'None' = 2` = The query result was neither written into nor read from the query cache. - - `'Write' = 3` = The query result was written into the query cache. - - `'Read' = 4` = The query result was read from the query cache. + - `'Unknown'` = Status unknown. + - `'None'` = The query result was neither written into nor read from the query cache. + - `'Write'` = The query result was written into the query cache. + - `'Read'` = The query result was read from the query cache. **Example** diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 5fe756268f2..c2de8ca22dd 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -26,11 +26,10 @@ class QueryCache public: enum class Usage { - /// starts at 1 for compatibitity with DataTypeEnum8 - Unknown = 1, /// we don't know what what happened - None, /// query result neither written nor read into/from query cache - Write, /// query result written into query cache - Read, /// query result read from query cache + Unknown, /// we don't know what what happened + None, /// query result neither written nor read into/from query cache + Write, /// query result written into query cache + Read, /// query result read from query cache }; /// Represents a query result in the cache. From 578ea9c5e8a0e9eed4da2afa15c84951aba9d688 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 27 Jul 2023 09:50:42 +0000 Subject: [PATCH 070/151] Update documentation --- docs/en/operations/backup.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 62f931a76b4..63bf3cfeb5c 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables + - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` ### Usage examples From 04361908e67380cdea7c185b28d8f056330424af Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Jul 2023 10:36:02 +0000 Subject: [PATCH 071/151] Fix test_keeper_reconfig_replace_leader --- tests/integration/test_keeper_reconfig_replace_leader/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py index ca1ec3a0c92..ef1d5394b67 100644 --- a/tests/integration/test_keeper_reconfig_replace_leader/test.py +++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py @@ -83,6 +83,8 @@ def test_reconfig_replace_leader(started_cluster): assert "node3" in config assert "node4" not in config + ku.wait_configs_equal(config, zk2) + with pytest.raises(Exception): zk1.stop() zk1.close() From 228de12d94a206f6eaae74059216886c32c2b53e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 27 Jul 2023 13:59:23 +0300 Subject: [PATCH 072/151] Update ReplicatedMergeTreeQueue.cpp (#52648) --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index e11913fc3d2..21d5597e614 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1803,7 +1803,7 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo Int64 part_data_version = part->info.getDataVersion(); Int64 part_metadata_version = part->getMetadataVersion(); - LOG_DEBUG(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); + LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); std::map result; /// Here we return mutation commands for part which has bigger alter version than part metadata version. From 9d73be6fca63edbca0bd97d07386f41268f1b11d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 12:14:46 +0000 Subject: [PATCH 073/151] test is added --- .../0_stateless/02833_local_udf_options.reference | 1 + tests/queries/0_stateless/02833_local_udf_options.sh | 11 +++++++++++ tests/queries/0_stateless/scripts_udf/function.xml | 9 +++++++++ tests/queries/0_stateless/scripts_udf/udf.sh | 3 +++ 4 files changed, 24 insertions(+) create mode 100755 tests/queries/0_stateless/02833_local_udf_options.reference create mode 100755 tests/queries/0_stateless/02833_local_udf_options.sh create mode 100644 tests/queries/0_stateless/scripts_udf/function.xml create mode 100755 tests/queries/0_stateless/scripts_udf/udf.sh diff --git a/tests/queries/0_stateless/02833_local_udf_options.reference b/tests/queries/0_stateless/02833_local_udf_options.reference new file mode 100755 index 00000000000..19f0805d8de --- /dev/null +++ b/tests/queries/0_stateless/02833_local_udf_options.reference @@ -0,0 +1 @@ +qwerty diff --git a/tests/queries/0_stateless/02833_local_udf_options.sh b/tests/queries/0_stateless/02833_local_udf_options.sh new file mode 100755 index 00000000000..149b62d7e2c --- /dev/null +++ b/tests/queries/0_stateless/02833_local_udf_options.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +SCRIPTS_DIR=$CUR_DIR/scripts_udf + +$CLICKHOUSE_LOCAL -q 'select test_function()' -- --user_scripts_path=$SCRIPTS_DIR --user_defined_executable_functions_config=$SCRIPTS_DIR/function.xml diff --git a/tests/queries/0_stateless/scripts_udf/function.xml b/tests/queries/0_stateless/scripts_udf/function.xml new file mode 100644 index 00000000000..69a0abb5cec --- /dev/null +++ b/tests/queries/0_stateless/scripts_udf/function.xml @@ -0,0 +1,9 @@ + + + executable + test_function + String + TabSeparated + udf.sh + + diff --git a/tests/queries/0_stateless/scripts_udf/udf.sh b/tests/queries/0_stateless/scripts_udf/udf.sh new file mode 100755 index 00000000000..add85833c3e --- /dev/null +++ b/tests/queries/0_stateless/scripts_udf/udf.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +echo qwerty From f3dc6dd061515054afbbe5c58452a9554998a8b7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 27 Jul 2023 15:23:57 +0300 Subject: [PATCH 074/151] Revert "Added field `refcount` to `system.remote_data_paths` table" --- src/Disks/IDisk.h | 5 +- .../ObjectStorages/DiskObjectStorage.cpp | 2 +- .../System/StorageSystemRemoteDataPaths.cpp | 8 +--- .../02791_remote_paths_refcount.reference | 28 ----------- .../02791_remote_paths_refcount.sql | 47 ------------------- 5 files changed, 4 insertions(+), 86 deletions(-) delete mode 100644 tests/queries/0_stateless/02791_remote_paths_refcount.reference delete mode 100644 tests/queries/0_stateless/02791_remote_paths_refcount.sql diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index a2c5e59237f..2b0ca369a96 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -303,11 +303,10 @@ public: std::string local_path; std::string common_prefix_for_objects; StoredObjects objects; - size_t refcount; LocalPathWithObjectStoragePaths( - const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_, size_t refcount_) - : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)), refcount(refcount_) {} + const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) + : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} }; virtual void getRemotePathsRecursive(const String &, std::vector &) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 001cff4cefe..762151b3808 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -82,7 +82,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: { try { - paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path), metadata_storage->getHardlinkCount(local_path)); + paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path)); } catch (const Exception & e) { diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 820b1cf3823..eb514d3b3f4 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -25,7 +25,6 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab {"local_path", std::make_shared()}, {"remote_path", std::make_shared()}, {"size", std::make_shared()}, - {"refcount", std::make_shared()}, {"common_prefix_for_blobs", std::make_shared()}, {"cache_paths", std::make_shared(std::make_shared())}, })); @@ -49,7 +48,6 @@ Pipe StorageSystemRemoteDataPaths::read( MutableColumnPtr col_local_path = ColumnString::create(); MutableColumnPtr col_remote_path = ColumnString::create(); MutableColumnPtr col_size = ColumnUInt64::create(); - MutableColumnPtr col_refcount = ColumnUInt64::create(); MutableColumnPtr col_namespace = ColumnString::create(); MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create()); @@ -67,22 +65,19 @@ Pipe StorageSystemRemoteDataPaths::read( if (disk->supportsCache()) cache = FileCacheFactory::instance().getByName(disk->getCacheName()).cache; - for (const auto & [local_path, common_prefox_for_objects, storage_objects, refcount] : remote_paths_by_local_path) + for (const auto & [local_path, common_prefox_for_objects, storage_objects] : remote_paths_by_local_path) { for (const auto & object : storage_objects) { col_disk_name->insert(disk_name); col_base_path->insert(disk->getPath()); - if (cache) col_cache_base_path->insert(cache->getBasePath()); else col_cache_base_path->insertDefault(); - col_local_path->insert(local_path); col_remote_path->insert(object.remote_path); col_size->insert(object.bytes_size); - col_refcount->insert(refcount); col_namespace->insert(common_prefox_for_objects); if (cache) @@ -106,7 +101,6 @@ Pipe StorageSystemRemoteDataPaths::read( res_columns.emplace_back(std::move(col_local_path)); res_columns.emplace_back(std::move(col_remote_path)); res_columns.emplace_back(std::move(col_size)); - res_columns.emplace_back(std::move(col_refcount)); res_columns.emplace_back(std::move(col_namespace)); res_columns.emplace_back(std::move(col_cache_paths)); diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.reference b/tests/queries/0_stateless/02791_remote_paths_refcount.reference deleted file mode 100644 index 56fb1536205..00000000000 --- a/tests/queries/0_stateless/02791_remote_paths_refcount.reference +++ /dev/null @@ -1,28 +0,0 @@ -0_0_0_0 0 -0_0_0_0_1 1 -1_0_0_0 0 -1_0_0_0_1 1 -0_0_0_0_1 checksums.txt 0 -0_0_0_0_1 columns.txt 1 -0_0_0_0_1 count.txt 1 -0_0_0_0_1 default_compression_codec.txt 1 -0_0_0_0_1 id.bin 1 -0_0_0_0_1 id.cmrk2 1 -0_0_0_0_1 metadata_version.txt 1 -0_0_0_0_1 minmax_id.idx 1 -0_0_0_0_1 partition.dat 1 -0_0_0_0_1 primary.cidx 1 -0_0_0_0_1 v.bin 1 -0_0_0_0_1 v.cmrk2 1 -1_0_0_0_1 checksums.txt 0 -1_0_0_0_1 columns.txt 0 -1_0_0_0_1 count.txt 1 -1_0_0_0_1 default_compression_codec.txt 0 -1_0_0_0_1 id.bin 1 -1_0_0_0_1 id.cmrk2 1 -1_0_0_0_1 metadata_version.txt 0 -1_0_0_0_1 minmax_id.idx 1 -1_0_0_0_1 partition.dat 1 -1_0_0_0_1 primary.cidx 1 -1_0_0_0_1 v.bin 0 -1_0_0_0_1 v.cmrk2 0 diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.sql b/tests/queries/0_stateless/02791_remote_paths_refcount.sql deleted file mode 100644 index 180601738ad..00000000000 --- a/tests/queries/0_stateless/02791_remote_paths_refcount.sql +++ /dev/null @@ -1,47 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS t_refcount SYNC; - --- Names of parts (on which this test depends) --- can differ in case of fault injection. -SET insert_keeper_fault_injection_probability = 0.0; - -CREATE TABLE t_refcount (id UInt64, v UInt64) -ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t_refcount', '1') -ORDER BY id PARTITION BY id % 2 -SETTINGS - storage_policy = 's3_cache', - allow_remote_fs_zero_copy_replication = 1, - min_bytes_for_wide_part = 0, - compress_marks = 1, - compress_primary_key = 1, - ratio_of_defaults_for_sparse_serialization = 1.0; - -INSERT INTO t_refcount VALUES (1, 10), (2, 20); - -SET mutations_sync = 2; -ALTER TABLE t_refcount UPDATE v = v * 10 WHERE id % 2 = 1; - -SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 't_refcount' ORDER BY name; - -WITH splitByChar('/', full_path) AS path_parts -SELECT path_parts[-2] AS part_name, path_parts[-1] AS file_name, refcount -FROM -( - SELECT - path || local_path AS full_path, - substring(full_path, 1, length(full_path) - position(reverse(full_path), '/') + 1) AS part_path, - refcount - FROM system.remote_data_paths - WHERE disk_name = 's3_cache' -) AS paths -INNER JOIN -( - SELECT path - FROM system.parts - WHERE database = currentDatabase() AND table = 't_refcount' AND active -) AS parts -ON paths.part_path = parts.path -ORDER BY part_name, file_name; - -DROP TABLE IF EXISTS t_refcount SYNC; From 8bba7baeaa65548d91da4c068c6af9b583f9449c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 12:34:22 +0000 Subject: [PATCH 075/151] fix style --- tests/queries/0_stateless/02833_local_udf_options.reference | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/02833_local_udf_options.reference diff --git a/tests/queries/0_stateless/02833_local_udf_options.reference b/tests/queries/0_stateless/02833_local_udf_options.reference old mode 100755 new mode 100644 From 33300a978e00687713d08fa786178eecc7bc15d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 15:38:53 +0300 Subject: [PATCH 076/151] Update CHANGELOG (#52655) --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 878edfa4add..f401b346726 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ * The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### New Feature -* Added support for PRQL as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). * Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). * Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). * Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). @@ -115,6 +115,7 @@ * Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). #### Experimental Feature +* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. * (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). * (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). From 541a253580b968e99c8ba9419ecf82d7459a4768 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 27 Jul 2023 12:43:38 +0000 Subject: [PATCH 077/151] fix crash with function tuple and one sparse argument --- src/DataTypes/DataTypeTuple.h | 1 + src/DataTypes/IDataType.h | 1 + src/Functions/IFunction.cpp | 2 +- .../02833_sparse_columns_tuple_function.reference | 4 ++++ .../02833_sparse_columns_tuple_function.sql | 14 ++++++++++++++ 5 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference create mode 100644 tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index ea05e6ae59b..0bf3f3ac8b3 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -37,6 +37,7 @@ public: bool canBeInsideNullable() const override { return false; } bool supportsSparseSerialization() const override { return true; } + bool canBeInsideSparseColumns() const override { return false; } MutableColumnPtr createColumn() const override; MutableColumnPtr createColumn(const ISerialization & serialization) const override; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 4adafe5d212..54cb3d0d5c2 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -110,6 +110,7 @@ public: /// TODO: support more types. virtual bool supportsSparseSerialization() const { return !haveSubtypes(); } + virtual bool canBeInsideSparseColumns() const { return supportsSparseSerialization(); } SerializationPtr getDefaultSerialization() const; SerializationPtr getSparseSerialization() const; diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 650b54d9a37..d119b15733b 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -361,7 +361,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, /// If default of sparse column is changed after execution of function, convert to full column. /// If there are any default in non-zero position after execution of function, convert to full column. /// Currently there is no easy way to rebuild sparse column with new offsets. - if (!result_type->supportsSparseSerialization() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1) + if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1) { const auto & offsets_data = assert_cast &>(*sparse_offsets).getData(); return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1); diff --git a/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference new file mode 100644 index 00000000000..85573e2ed49 --- /dev/null +++ b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference @@ -0,0 +1,4 @@ +(0,0) +(0,0) +(0,1) +(0,NULL) diff --git a/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql new file mode 100644 index 00000000000..776dd35ddba --- /dev/null +++ b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql @@ -0,0 +1,14 @@ +drop table if exists t_tuple_sparse; + +create table t_tuple_sparse (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.0; + +insert into t_tuple_sparse values (0, 0); + +select (a, b) from t_tuple_sparse; +select (a, 0) from t_tuple_sparse; +select (a, 1) from t_tuple_sparse; +select (a, NULL) from t_tuple_sparse; + +drop table if exists t_tuple_sparse; From 78f3a575f9ddbfd47e46e8169b63979e3d2aa72f Mon Sep 17 00:00:00 2001 From: Jiebin Sun Date: Thu, 27 Jul 2023 21:06:34 +0800 Subject: [PATCH 078/151] Convert hashSets in parallel before merge (#50748) * Convert hashSets in parallel before merge Before merge, if one of the lhs and rhs is singleLevelSet and the other is twoLevelSet, then the SingleLevelSet will call convertToTwoLevel(). The convert process is not in parallel and it will cost lots of cycle if it cosume all the singleLevelSet. The idea of the patch is to convert all the singleLevelSets to twoLevelSets in parallel if the hashsets are not all singleLevel or not all twoLevel. I have tested the patch on Intel 2 x 112 vCPUs SPR server with clickbench and latest upstream ClickHouse. Q5 has got a big 264% performance improvement and 24 queries have got at least 5% performance gain. The overall geomean of 43 queries has gained 7.4% more than the base code. Signed-off-by: Jiebin Sun * add resize() for the data_vec in parallelizeMergePrepare() Signed-off-by: Jiebin Sun * Add the performance test prepare_hash_before_merge.xml Signed-off-by: Jiebin Sun * Fit the CI to rename the data set from hits_v1 to test.hits. Signed-off-by: Jiebin Sun * remove the redundant branch in UniqExactSet Co-authored-by: Nikita Taranov * Remove the empty methods and add throw exception in parallelizeMergePrepare() Signed-off-by: Jiebin Sun --------- Signed-off-by: Jiebin Sun Co-authored-by: Nikita Taranov --- .../AggregateFunctionUniq.h | 39 ++++++++++++++ src/AggregateFunctions/IAggregateFunction.h | 8 +++ src/AggregateFunctions/UniqExactSet.h | 51 +++++++++++++++++++ src/Interpreters/Aggregator.cpp | 14 +++++ .../performance/prepare_hash_before_merge.xml | 4 ++ 5 files changed, 116 insertions(+) create mode 100644 tests/performance/prepare_hash_before_merge.xml diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index de68e9076a0..2810051a82f 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -29,6 +29,10 @@ #include #include +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} namespace DB { @@ -42,6 +46,7 @@ struct AggregateFunctionUniqUniquesHashSetData Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniq"; } @@ -55,6 +60,7 @@ struct AggregateFunctionUniqUniquesHashSetDataForVariadic Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -72,6 +78,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -84,6 +91,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -96,6 +104,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -108,6 +117,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -120,6 +130,7 @@ struct AggregateFunctionUniqHLL12DataForVariadic Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -143,6 +154,7 @@ struct AggregateFunctionUniqExactData Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = false; static String getName() { return "uniqExact"; } @@ -162,6 +174,7 @@ struct AggregateFunctionUniqExactData Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = false; static String getName() { return "uniqExact"; } @@ -181,6 +194,7 @@ struct AggregateFunctionUniqExactData Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = false; static String getName() { return "uniqExact"; } @@ -190,6 +204,7 @@ template { constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -204,6 +219,7 @@ struct AggregateFunctionUniqThetaData Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqTheta"; } @@ -213,6 +229,7 @@ template struct AggregateFunctionUniqThetaDataForVariadic : AggregateFunctionUniqThetaData { constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -384,8 +401,10 @@ template class AggregateFunctionUniq final : public IAggregateFunctionDataHelper> { private: + using DataSet = typename Data::Set; static constexpr size_t num_args = 1; static constexpr bool is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge; + static constexpr bool is_parallelize_merge_prepare_needed = Data::is_parallelize_merge_prepare_needed; public: explicit AggregateFunctionUniq(const DataTypes & argument_types_) @@ -439,6 +458,26 @@ public: detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } + bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed;} + + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool) const override + { + if constexpr (is_parallelize_merge_prepare_needed) + { + std::vector data_vec; + data_vec.resize(places.size()); + + for (unsigned long i = 0; i < data_vec.size(); i++) + data_vec[i] = &this->data(places[i]).set; + + DataSet::parallelizeMergePrepare(data_vec, thread_pool); + } + else + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() is only implemented when is_parallelize_merge_prepare_needed is true for {} ", getName()); + } + } + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).set.merge(this->data(rhs).set); diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index df08b6f2109..b460a66ea22 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -47,6 +47,7 @@ using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; using AggregateDataPtr = char *; +using AggregateDataPtrs = std::vector; using ConstAggregateDataPtr = const char *; class IAggregateFunction; @@ -148,6 +149,13 @@ public: /// Default values must be a the 0-th positions in columns. virtual void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t length, Arena * arena) const = 0; + virtual bool isParallelizeMergePrepareNeeded() const { return false; } + + virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName()); + } + /// Merges state (on which place points to) with other state of current aggregation function. virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const = 0; diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 90cfe700179..0d99b29686f 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -28,6 +28,57 @@ public: asTwoLevel().insert(std::forward(arg)); } + /// In merge, if one of the lhs and rhs is twolevelset and the other is singlelevelset, then the singlelevelset will need to convertToTwoLevel(). + /// It's not in parallel and will cost extra large time if the thread_num is large. + /// This method will convert all the SingleLevelSet to TwoLevelSet in parallel if the hashsets are not all singlelevel or not all twolevel. + static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool) + { + unsigned long single_level_set_num = 0; + + for (auto ele : data_vec) + { + if (ele->isSingleLevel()) + single_level_set_num ++; + } + + if (single_level_set_num > 0 && single_level_set_num < data_vec.size()) + { + try + { + auto data_vec_atomic_index = std::make_shared(0); + auto thread_func = [data_vec, data_vec_atomic_index, thread_group = CurrentThread::getGroup()]() + { + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + + setThreadName("UniqExaConvert"); + + while (true) + { + const auto i = data_vec_atomic_index->fetch_add(1); + if (i >= data_vec.size()) + return; + if (data_vec[i]->isSingleLevel()) + data_vec[i]->convertToTwoLevel(); + } + }; + for (size_t i = 0; i < std::min(thread_pool.getMaxThreads(), single_level_set_num); ++i) + thread_pool.scheduleOrThrowOnError(thread_func); + + thread_pool.wait(); + } + catch (...) + { + thread_pool.wait(); + throw; + } + } + } + auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr) { if (isSingleLevel() && other.isTwoLevel()) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 36cd32910b5..c2914c938b5 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2603,6 +2603,20 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( AggregatedDataVariantsPtr & res = non_empty_data[0]; + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (aggregate_functions[i]->isParallelizeMergePrepareNeeded()) + { + size_t size = non_empty_data.size(); + std::vector data_vec; + + for (size_t result_num = 0; result_num < size; ++result_num) + data_vec.emplace_back(non_empty_data[result_num]->without_key + offsets_of_aggregate_states[i]); + + aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool); + } + } + /// We merge all aggregation results to the first. for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { diff --git a/tests/performance/prepare_hash_before_merge.xml b/tests/performance/prepare_hash_before_merge.xml new file mode 100644 index 00000000000..e99f762927f --- /dev/null +++ b/tests/performance/prepare_hash_before_merge.xml @@ -0,0 +1,4 @@ + + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 24 + SELECT COUNT(DISTINCT Referer) FROM test.hits SETTINGS max_threads = 22 + From 671128140dc6672349421c84643076410d46ce0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 15:34:05 +0200 Subject: [PATCH 079/151] Update autogenerated version to 23.8.1.1 and contributors --- cmake/autogenerated_versions.txt | 10 ++++---- .../StorageSystemContributors.generated.cpp | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 821b7b46855..9919d018046 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54476) +SET(VERSION_REVISION 54477) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df) -SET(VERSION_DESCRIBE v23.7.1.1-testing) -SET(VERSION_STRING 23.7.1.1) +SET(VERSION_GITHASH a70127baecc451f1f7073bad7b6198f6703441d8) +SET(VERSION_DESCRIBE v23.8.1.1-testing) +SET(VERSION_STRING 23.8.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f84c554afc0..031c7454ab6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -42,10 +42,12 @@ const char * auto_contributors[] { "Alex", "Alex Bocharov", "Alex Cao", + "Alex Cheng", "Alex Karo", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", + "AlexBykovski", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", @@ -232,6 +234,7 @@ const char * auto_contributors[] { "CheSema", "Chebarykov Pavel", "Chen Yufei", + "Chen768959", "Cheng Pan", "Chienlung Cheung", "Christian", @@ -485,6 +488,7 @@ const char * auto_contributors[] { "John", "John Hummel", "John Skopis", + "John Spurlock", "Jonatas Freitas", "Jonathan-Ackerman", "Jordi", @@ -659,6 +663,7 @@ const char * auto_contributors[] { "Mikhail Gaidamaka", "Mikhail Guzov", "Mikhail Korotov", + "Mikhail Koviazin", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", @@ -815,6 +820,7 @@ const char * auto_contributors[] { "Roman Vasin", "Roman Vlasenko", "Roman Zhukov", + "Rory Crispin", "Roy Bellingan", "Ruslan", "Ruslan Savchenko", @@ -832,7 +838,9 @@ const char * auto_contributors[] { "Salvatore Mesoraca", "Sami Kerola", "Samuel Chou", + "Samuel Colvin", "San", + "Sanjam Panda", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -883,6 +891,7 @@ const char * auto_contributors[] { "SmitaRKulkarni", "Snow", "Sofia Antipushina", + "Song Liyong", "Sorck", "Stanislav Dobrovolschii", "Stanislav Pavlovichev", @@ -893,6 +902,7 @@ const char * auto_contributors[] { "Stepan Herold", "Stephan", "Steve-金勇", + "StianBerger", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", @@ -977,6 +987,7 @@ const char * auto_contributors[] { "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", + "Vitaliy Pashkov", "Vitaliy Zakaznikov", "Vitaly", "Vitaly Artemyev", @@ -1029,6 +1040,7 @@ const char * auto_contributors[] { "Yakov Olkhovskiy", "YalalovSM", "Yangkuan Liu", + "Yarik Briukhovetskyi", "Yatian Xu", "Yatsishin Ilya", "Yağızcan Değirmenci", @@ -1053,6 +1065,7 @@ const char * auto_contributors[] { "Yury Karpovich", "Yury Stankevich", "Yusuke Tanaka", + "Zach Naimon", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1143,6 +1156,7 @@ const char * auto_contributors[] { "changvvb", "chasingegg", "chen", + "chen768959", "chen9t", "chengy8934", "chenjian", @@ -1179,6 +1193,7 @@ const char * auto_contributors[] { "detailyang", "dfenelonov", "dgrr", + "dheerajathrey", "dimarub2000", "dinosaur", "divanorama", @@ -1329,6 +1344,7 @@ const char * auto_contributors[] { "lanfz", "larryluogit", "laurieliyang", + "lcjh", "lehasm", "leosunli", "leozhang", @@ -1455,6 +1471,7 @@ const char * auto_contributors[] { "pawelsz-rb", "pdai", "pdv-ru", + "pedro.riera", "pengxiangcai", "peshkurov", "peter279k", @@ -1548,8 +1565,10 @@ const char * auto_contributors[] { "teng.ma", "terrylin", "tesw yew isal", + "therealnick233", "tianzhou", "tiger.yan", + "timfursov", "tison", "topvisor", "tpanetti", @@ -1563,6 +1582,7 @@ const char * auto_contributors[] { "usurai", "vahid-sohrabloo", "vdimir", + "velavokr", "velom", "vesslanjin", "vgocoder", @@ -1587,17 +1607,21 @@ const char * auto_contributors[] { "wuxiaobai24", "wzl", "xPoSx", + "xiao", + "xiaolei565", "xiedeyantu", "xieyichen", "xinhuitian", "xlwh", "xmy", + "xuelei", "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", "yangshuai", "yaqi-zhao", + "yariks5s", "yeer", "ygrek", "yhgcn", From 28c49e3f20923917c46872b15dce21a72ab47b4a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 27 Jul 2023 13:47:11 +0000 Subject: [PATCH 080/151] Update version_date.tsv and changelogs after v23.7.1.2470-stable --- SECURITY.md | 3 +- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.7.1.2470-stable.md | 452 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 458 insertions(+), 4 deletions(-) create mode 100644 docs/changelogs/v23.7.1.2470-stable.md diff --git a/SECURITY.md b/SECURITY.md index 4ba5f13d09c..d61533b44b9 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.7 | ✔️ | | 23.6 | ✔️ | | 23.5 | ✔️ | -| 23.4 | ✔️ | +| 23.4 | ❌ | | 23.3 | ✔️ | | 23.2 | ❌ | | 23.1 | ❌ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 8a6324aef88..c9800e4e66d 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.1.2470" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7f453627601..f558338b23c 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.1.2470" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 1fa7b83ae16..156de034a7f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.1.2470" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.7.1.2470-stable.md b/docs/changelogs/v23.7.1.2470-stable.md new file mode 100644 index 00000000000..a77078cb653 --- /dev/null +++ b/docs/changelogs/v23.7.1.2470-stable.md @@ -0,0 +1,452 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.1.2470-stable (a70127baecc) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Backward Incompatible Change +* Add ` NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after **downgrade** (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `toDecimalString` is removed due to subpar implementation quality. This closes [#52407](https://github.com/ClickHouse/ClickHouse/issues/52407). [#52450](https://github.com/ClickHouse/ClickHouse/pull/52450) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement KQL-style formatting for Interval. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Support ZooKeeper `reconfig` command for CH Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). +* Kafka connector can fetch avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added support for prql as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add a column is_obsolete to system.settings and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Just a new request of [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add SYSTEM STOP LISTEN query. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add input_format_csv_allow_variable_number_of_columns options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function substring_index, as in spark or mysql. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* Show stats for jemalloc bins. Example ``` SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add RowBinaryWithDefaults format with extra byte before each column for using column default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new initcap / initcapUTF8 functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage (for query) to client final statistics, and to http header. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new hasSubsequence() (+CaseInsensitive + UTF8 versions) functions. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Re-add SipHash keyed functions. [#52206](https://github.com/ClickHouse/ClickHouse/pull/52206) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Performance Improvement +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Remove duplicate condition in functionunixtimestamp64.h. [#51857](https://github.com/ClickHouse/ClickHouse/pull/51857) ([lcjh](https://github.com/ljhcage)). +* The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* 1. Add rewriter for both old and new analyzer. 2. Add settings `optimize_uniq_to_count` which default is 0. [#52004](https://github.com/ClickHouse/ClickHouse/pull/52004) ([JackyWoo](https://github.com/JackyWoo)). +* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **11.6%** to the QPS of the query **Q8** while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in FileCache::loadMetadata. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues: ``` select geohashEncode(120.2, number::Float64) from numbers(10);. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow filtering HTTP headers with `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* #50727 new alias for function current_database and added new function current_schemas. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Log async insert flush queries into to system.query_log. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Fix checking error caused by uninitialized class members. [#51418](https://github.com/ClickHouse/ClickHouse/pull/51418) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correcting the message of modify storage policy https://github.com/clickhouse/clickhouse/issues/51516 ### documentation entry for user-facing changes. [#51519](https://github.com/ClickHouse/ClickHouse/pull/51519) ([xiaolei565](https://github.com/xiaolei565)). +* Support `DROP FILESYSTEM CACHE KEY [ OFFSET ]`. [#51547](https://github.com/ClickHouse/ClickHouse/pull/51547) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column modification time into system.parts_columns. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function range of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of statement SHOW INDEX. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* CacheDictionary request only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed settings not applied for explain query when format provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixed crash when mysqlxx::Pool::Entry is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* CREATE TABLE ... AS SELECT .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Log messages are written to text_log from the beginning. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception will be thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Support async_deduplication_token for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Avro input format support Union with single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). This is defaulted to false until [#52075](https://github.com/ClickHouse/ClickHouse/issues/52075) is fixed. [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#52181](https://github.com/ClickHouse/ClickHouse/pull/52181) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now unquoted utf-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093) . [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52419](https://github.com/ClickHouse/ClickHouse/pull/52419) ([alesapin](https://github.com/alesapin)). +* The `session_timezone` setting is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). +* Added field `refcount` to `system.remote_data_paths` table. [#52518](https://github.com/ClickHouse/ClickHouse/pull/52518) ([Anton Popov](https://github.com/CurtizJ)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed CRC32(WeakHash32) issue for s390x. [#50365](https://github.com/ClickHouse/ClickHouse/pull/50365) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add integration test check with the enabled analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) ([Dmitry Novik](https://github.com/novikd)). +* Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed several issues found by OSS-Fuzz. [#51736](https://github.com/ClickHouse/ClickHouse/pull/51736) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There were a couple of failures because of (?) S3 availability. The sccache has a feature of failing over to local compilation. [#51893](https://github.com/ClickHouse/ClickHouse/pull/51893) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* 02242_delete_user_race and 02243_drop_user_grant_race tests have been corrected. [#51923](https://github.com/ClickHouse/ClickHouse/pull/51923) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow up [#50926](https://github.com/ClickHouse/ClickHouse/issues/50926). Add integration tests check with enabled analyzer to master. [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Improve the startup time of `clickhouse-client` and `clickhouse-local` in debug and sanitizer builds. This closes [#52228](https://github.com/ClickHouse/ClickHouse/issues/52228). [#52489](https://github.com/ClickHouse/ClickHouse/pull/52489) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix materialised pg syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add documentation for building in docker"'. [#51773](https://github.com/ClickHouse/ClickHouse/pull/51773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix build"'. [#51911](https://github.com/ClickHouse/ClickHouse/pull/51911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Millisecond and microsecond support in date_diff / age functions"'. [#52129](https://github.com/ClickHouse/ClickHouse/pull/52129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Re-add SipHash keyed functions"'. [#52466](https://github.com/ClickHouse/ClickHouse/pull/52466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add an ability to specify allocations size for sampling memory profiler"'. [#52496](https://github.com/ClickHouse/ClickHouse/pull/52496) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Rewrite uniq to count"'. [#52576](https://github.com/ClickHouse/ClickHouse/pull/52576) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove duplicate_order_by_and_distinct optimization [#47135](https://github.com/ClickHouse/ClickHouse/pull/47135) ([Igor Nikonov](https://github.com/devcrafter)). +* Update sort desc in ReadFromMergeTree after applying PREWHERE info [#48669](https://github.com/ClickHouse/ClickHouse/pull/48669) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `BindException: Address already in use` in HDFS integration tests [#49428](https://github.com/ClickHouse/ClickHouse/pull/49428) ([Nikita Taranov](https://github.com/nickitat)). +* Force libunwind usage (removes gcc_eh support) [#49438](https://github.com/ClickHouse/ClickHouse/pull/49438) ([Azat Khuzhin](https://github.com/azat)). +* Cleanup `storage_conf.xml` [#49557](https://github.com/ClickHouse/ClickHouse/pull/49557) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky tests caused by OPTIMIZE FINAL failing memory budget check [#49764](https://github.com/ClickHouse/ClickHouse/pull/49764) ([Michael Kolupaev](https://github.com/al13n321)). +* Remove unstable queries from performance/join_set_filter [#50235](https://github.com/ClickHouse/ClickHouse/pull/50235) ([vdimir](https://github.com/vdimir)). +* More accurate DNS resolve for the keeper connection [#50738](https://github.com/ClickHouse/ClickHouse/pull/50738) ([pufit](https://github.com/pufit)). +* Try to fix some trash in Disks and part moves [#51135](https://github.com/ClickHouse/ClickHouse/pull/51135) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add jemalloc support fro s390x [#51186](https://github.com/ClickHouse/ClickHouse/pull/51186) ([Boris Kuschel](https://github.com/bkuschel)). +* Resubmit [#48821](https://github.com/ClickHouse/ClickHouse/issues/48821) [#51208](https://github.com/ClickHouse/ClickHouse/pull/51208) ([Kseniia Sumarokova](https://github.com/kssenii)). +* test for [#36894](https://github.com/ClickHouse/ClickHouse/issues/36894) [#51274](https://github.com/ClickHouse/ClickHouse/pull/51274) ([Denny Crane](https://github.com/den-crane)). +* external_aggregation_fix for big endian machines [#51280](https://github.com/ClickHouse/ClickHouse/pull/51280) ([Sanjam Panda](https://github.com/saitama951)). +* Fix: Invalid number of rows in Chunk column Object [#51296](https://github.com/ClickHouse/ClickHouse/pull/51296) ([Igor Nikonov](https://github.com/devcrafter)). +* Add a test for [#44816](https://github.com/ClickHouse/ClickHouse/issues/44816) [#51305](https://github.com/ClickHouse/ClickHouse/pull/51305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for `calculate_text_stack_trace` setting [#51311](https://github.com/ClickHouse/ClickHouse/pull/51311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* decrease log level, make logs shorter [#51320](https://github.com/ClickHouse/ClickHouse/pull/51320) ([Sema Checherinda](https://github.com/CheSema)). +* Collect stack traces from job's scheduling and print along with exception's stack trace. [#51349](https://github.com/ClickHouse/ClickHouse/pull/51349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add a test for [#42691](https://github.com/ClickHouse/ClickHouse/issues/42691) [#51352](https://github.com/ClickHouse/ClickHouse/pull/51352) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#32474](https://github.com/ClickHouse/ClickHouse/issues/32474) [#51354](https://github.com/ClickHouse/ClickHouse/pull/51354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#41727](https://github.com/ClickHouse/ClickHouse/issues/41727) [#51355](https://github.com/ClickHouse/ClickHouse/pull/51355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#35801](https://github.com/ClickHouse/ClickHouse/issues/35801) [#51356](https://github.com/ClickHouse/ClickHouse/pull/51356) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34626](https://github.com/ClickHouse/ClickHouse/issues/34626) [#51357](https://github.com/ClickHouse/ClickHouse/pull/51357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Initialize text_log earlier to capture table startup messages [#51360](https://github.com/ClickHouse/ClickHouse/pull/51360) ([Azat Khuzhin](https://github.com/azat)). +* Use separate default settings for clickhouse-local [#51363](https://github.com/ClickHouse/ClickHouse/pull/51363) ([Azat Khuzhin](https://github.com/azat)). +* Attempt to remove wrong code (catch/throw in Functions) [#51367](https://github.com/ClickHouse/ClickHouse/pull/51367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove suspicious code [#51383](https://github.com/ClickHouse/ClickHouse/pull/51383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable hedged requests under TSan [#51392](https://github.com/ClickHouse/ClickHouse/pull/51392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* no finalize in d-tor WriteBufferFromOStream [#51404](https://github.com/ClickHouse/ClickHouse/pull/51404) ([Sema Checherinda](https://github.com/CheSema)). +* Better diagnostics for 01193_metadata_loading [#51414](https://github.com/ClickHouse/ClickHouse/pull/51414) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix attaching gdb in stress tests [#51445](https://github.com/ClickHouse/ClickHouse/pull/51445) ([Kruglov Pavel](https://github.com/Avogar)). +* Merging [#36384](https://github.com/ClickHouse/ClickHouse/issues/36384) [#51458](https://github.com/ClickHouse/ClickHouse/pull/51458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible race on shutdown wait [#51497](https://github.com/ClickHouse/ClickHouse/pull/51497) ([Sergei Trifonov](https://github.com/serxa)). +* Fix `test_alter_moving_garbage`: lock between getActiveContainingPart and swapActivePart in parts mover [#51498](https://github.com/ClickHouse/ClickHouse/pull/51498) ([vdimir](https://github.com/vdimir)). +* Fix a logical error on mutation [#51502](https://github.com/ClickHouse/ClickHouse/pull/51502) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix running integration tests with spaces in it's names [#51514](https://github.com/ClickHouse/ClickHouse/pull/51514) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test 00417_kill_query [#51522](https://github.com/ClickHouse/ClickHouse/pull/51522) ([Nikolay Degterinsky](https://github.com/evillique)). +* fs cache: add some checks [#51536](https://github.com/ClickHouse/ClickHouse/pull/51536) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests [#51549](https://github.com/ClickHouse/ClickHouse/pull/51549) ([Nikita Taranov](https://github.com/nickitat)). +* 00900_orc_load: lift kill timeout [#51559](https://github.com/ClickHouse/ClickHouse/pull/51559) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries to 00416_pocopatch_progress_in_http_headers [#51575](https://github.com/ClickHouse/ClickHouse/pull/51575) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix merge_selecting_task scheduling [#51591](https://github.com/ClickHouse/ClickHouse/pull/51591) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add hex functions for cityhash [#51595](https://github.com/ClickHouse/ClickHouse/pull/51595) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove `unset CLICKHOUSE_LOG_COMMENT` from tests [#51623](https://github.com/ClickHouse/ClickHouse/pull/51623) ([Nikita Taranov](https://github.com/nickitat)). +* Implement endianness-independent serialization [#51637](https://github.com/ClickHouse/ClickHouse/pull/51637) ([ltrk2](https://github.com/ltrk2)). +* Ignore APPEND and TRUNCATE modifiers if file does not exist. [#51640](https://github.com/ClickHouse/ClickHouse/pull/51640) ([alekar](https://github.com/alekar)). +* Try to fix flaky 02210_processors_profile_log [#51641](https://github.com/ClickHouse/ClickHouse/pull/51641) ([Igor Nikonov](https://github.com/devcrafter)). +* Make common macros extendable [#51646](https://github.com/ClickHouse/ClickHouse/pull/51646) ([Amos Bird](https://github.com/amosbird)). +* Correct an exception message in src/Functions/nested.cpp [#51651](https://github.com/ClickHouse/ClickHouse/pull/51651) ([Alex Cheng](https://github.com/Alex-Cheng)). +* tests: fix 02050_client_profile_events flakiness [#51653](https://github.com/ClickHouse/ClickHouse/pull/51653) ([Azat Khuzhin](https://github.com/azat)). +* Minor follow-up to re2 update to 2023-06-02 ([#50949](https://github.com/ClickHouse/ClickHouse/issues/50949)) [#51655](https://github.com/ClickHouse/ClickHouse/pull/51655) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Update timeouts in tests for transactions [#51683](https://github.com/ClickHouse/ClickHouse/pull/51683) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove unused code [#51684](https://github.com/ClickHouse/ClickHouse/pull/51684) ([Sergei Trifonov](https://github.com/serxa)). +* Remove `mmap/mremap/munmap` from Allocator.h [#51686](https://github.com/ClickHouse/ClickHouse/pull/51686) ([alesapin](https://github.com/alesapin)). +* SonarCloud: Add C++23 Experimental Flag [#51687](https://github.com/ClickHouse/ClickHouse/pull/51687) ([Julio Jimenez](https://github.com/juliojimenez)). +* Wait with retries when attaching GDB in tests [#51688](https://github.com/ClickHouse/ClickHouse/pull/51688) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v23.6.1.1524-stable [#51691](https://github.com/ClickHouse/ClickHouse/pull/51691) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* fix write to finalized buffer [#51696](https://github.com/ClickHouse/ClickHouse/pull/51696) ([Sema Checherinda](https://github.com/CheSema)). +* do not log exception aborted for pending mutate/merge entries when shutdown [#51697](https://github.com/ClickHouse/ClickHouse/pull/51697) ([Sema Checherinda](https://github.com/CheSema)). +* Fix race in ContextAccess [#51704](https://github.com/ClickHouse/ClickHouse/pull/51704) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make test scripts backwards compatible [#51707](https://github.com/ClickHouse/ClickHouse/pull/51707) ([Antonio Andelic](https://github.com/antonio2368)). +* test for full join and null predicate [#51709](https://github.com/ClickHouse/ClickHouse/pull/51709) ([Denny Crane](https://github.com/den-crane)). +* A cmake warning on job limits underutilizing CPU [#51710](https://github.com/ClickHouse/ClickHouse/pull/51710) ([velavokr](https://github.com/velavokr)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Added ASK_PASSWORD client constant instead of hardcoded '\n' [#51723](https://github.com/ClickHouse/ClickHouse/pull/51723) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update README.md [#51726](https://github.com/ClickHouse/ClickHouse/pull/51726) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove MemoryPool from Poco because it's useless [#51732](https://github.com/ClickHouse/ClickHouse/pull/51732) ([alesapin](https://github.com/alesapin)). +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Update 01320_create_sync_race_condition_zookeeper.sh [#51742](https://github.com/ClickHouse/ClickHouse/pull/51742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Fix: Invalid number of rows in Chunk column Object" [#51750](https://github.com/ClickHouse/ClickHouse/pull/51750) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add SonarCloud to README [#51751](https://github.com/ClickHouse/ClickHouse/pull/51751) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test `02789_object_type_invalid_num_of_rows` [#51754](https://github.com/ClickHouse/ClickHouse/pull/51754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (benign) data race in `transform` [#51755](https://github.com/ClickHouse/ClickHouse/pull/51755) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky KeeperMap test [#51764](https://github.com/ClickHouse/ClickHouse/pull/51764) ([Antonio Andelic](https://github.com/antonio2368)). +* Version mypy=1.4.1 falsly reports unused ignore comment [#51769](https://github.com/ClickHouse/ClickHouse/pull/51769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid keeping lock Context::getLock() while calculating access rights [#51772](https://github.com/ClickHouse/ClickHouse/pull/51772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Making stateless tests with timeout less flaky [#51774](https://github.com/ClickHouse/ClickHouse/pull/51774) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix after [#51000](https://github.com/ClickHouse/ClickHouse/issues/51000) [#51790](https://github.com/ClickHouse/ClickHouse/pull/51790) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add assert in ThreadStatus destructor for correct current_thread [#51800](https://github.com/ClickHouse/ClickHouse/pull/51800) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix broken parts handling in `ReplicatedMergeTree` [#51801](https://github.com/ClickHouse/ClickHouse/pull/51801) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tsan signal-unsafe call [#51802](https://github.com/ClickHouse/ClickHouse/pull/51802) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix for parallel replicas not completely disabled by granule count threshold [#51805](https://github.com/ClickHouse/ClickHouse/pull/51805) ([Alexander Gololobov](https://github.com/davenger)). +* Make sure that we don't attempt to serialize/deserialize block with 0 columns and non-zero rows [#51807](https://github.com/ClickHouse/ClickHouse/pull/51807) ([Alexander Gololobov](https://github.com/davenger)). +* Fix rare bug in `DROP COLUMN` and enabled sparse columns [#51809](https://github.com/ClickHouse/ClickHouse/pull/51809) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky `test_multiple_disks` [#51821](https://github.com/ClickHouse/ClickHouse/pull/51821) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow up to [#51547](https://github.com/ClickHouse/ClickHouse/issues/51547) [#51822](https://github.com/ClickHouse/ClickHouse/pull/51822) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly grep archives in stress tests [#51824](https://github.com/ClickHouse/ClickHouse/pull/51824) ([Antonio Andelic](https://github.com/antonio2368)). +* Update analyzer_tech_debt.txt [#51836](https://github.com/ClickHouse/ClickHouse/pull/51836) ([Alexander Tokmakov](https://github.com/tavplubix)). +* remove unused code [#51837](https://github.com/ClickHouse/ClickHouse/pull/51837) ([flynn](https://github.com/ucasfl)). +* Fix disk config for upgrade tests [#51839](https://github.com/ClickHouse/ClickHouse/pull/51839) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove Coverity from workflows, but leave in the code [#51842](https://github.com/ClickHouse/ClickHouse/pull/51842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Many fixes [3] [#51848](https://github.com/ClickHouse/ClickHouse/pull/51848) ([Ilya Yatsishin](https://github.com/qoega)). +* Change misleading name in joins: addJoinedBlock -> addBlockToJoin [#51852](https://github.com/ClickHouse/ClickHouse/pull/51852) ([Igor Nikonov](https://github.com/devcrafter)). +* fix: correct exception messages on policies comparison [#51854](https://github.com/ClickHouse/ClickHouse/pull/51854) ([Feng Kaiyu](https://github.com/fky2015)). +* Update 02439_merge_selecting_partitions.sql [#51862](https://github.com/ClickHouse/ClickHouse/pull/51862) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove useless packages [#51863](https://github.com/ClickHouse/ClickHouse/pull/51863) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless logs [#51865](https://github.com/ClickHouse/ClickHouse/pull/51865) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect log level = warning [#51867](https://github.com/ClickHouse/ClickHouse/pull/51867) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_replicated_table_attach [#51868](https://github.com/ClickHouse/ClickHouse/pull/51868) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better usability of a test [#51869](https://github.com/ClickHouse/ClickHouse/pull/51869) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless code [#51873](https://github.com/ClickHouse/ClickHouse/pull/51873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix upgrade check script [#51878](https://github.com/ClickHouse/ClickHouse/pull/51878) ([Antonio Andelic](https://github.com/antonio2368)). +* Sqlloogic improvements [#51883](https://github.com/ClickHouse/ClickHouse/pull/51883) ([Ilya Yatsishin](https://github.com/qoega)). +* Disable ThinLTO on non-Linux [#51897](https://github.com/ClickHouse/ClickHouse/pull/51897) ([Robert Schulze](https://github.com/rschu1ze)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix build [#51909](https://github.com/ClickHouse/ClickHouse/pull/51909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build [#51910](https://github.com/ClickHouse/ClickHouse/pull/51910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `00175_partition_by_ignore` and move it to correct location [#51913](https://github.com/ClickHouse/ClickHouse/pull/51913) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool [#51914](https://github.com/ClickHouse/ClickHouse/pull/51914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe better tests [#51916](https://github.com/ClickHouse/ClickHouse/pull/51916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert system drop filesystem cache by key [#51917](https://github.com/ClickHouse/ClickHouse/pull/51917) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `detach_attach_partition_race` [#51920](https://github.com/ClickHouse/ClickHouse/pull/51920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix for `02481_async_insert_race_long` [#51925](https://github.com/ClickHouse/ClickHouse/pull/51925) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by `ThreadStatus` [#51931](https://github.com/ClickHouse/ClickHouse/pull/51931) ([Antonio Andelic](https://github.com/antonio2368)). +* Print short fault info only from safe fields [#51932](https://github.com/ClickHouse/ClickHouse/pull/51932) ([Alexander Gololobov](https://github.com/davenger)). +* Fix typo in integration tests [#51944](https://github.com/ClickHouse/ClickHouse/pull/51944) ([Ilya Yatsishin](https://github.com/qoega)). +* Better logs on shutdown [#51951](https://github.com/ClickHouse/ClickHouse/pull/51951) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Filter databases list before querying potentially slow fields [#51955](https://github.com/ClickHouse/ClickHouse/pull/51955) ([Alexander Gololobov](https://github.com/davenger)). +* Fix some issues with transactions [#51959](https://github.com/ClickHouse/ClickHouse/pull/51959) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix unrelated messages from LSan in clickhouse-client [#51966](https://github.com/ClickHouse/ClickHouse/pull/51966) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow OOM in AST Fuzzer with Sanitizers [#51967](https://github.com/ClickHouse/ClickHouse/pull/51967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable one test under Analyzer [#51968](https://github.com/ClickHouse/ClickHouse/pull/51968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Docker [#51969](https://github.com/ClickHouse/ClickHouse/pull/51969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01825_type_json_from_map` [#51970](https://github.com/ClickHouse/ClickHouse/pull/51970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02354_distributed_with_external_aggregation_memory_usage` [#51971](https://github.com/ClickHouse/ClickHouse/pull/51971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix disaster in integration tests, part 2 [#51973](https://github.com/ClickHouse/ClickHouse/pull/51973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Cleanup remote_servers in dist config.xml [#51985](https://github.com/ClickHouse/ClickHouse/pull/51985) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.6.2.18-stable [#51986](https://github.com/ClickHouse/ClickHouse/pull/51986) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.20.11-lts [#51987](https://github.com/ClickHouse/ClickHouse/pull/51987) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix performance test for regexp cache [#51988](https://github.com/ClickHouse/ClickHouse/pull/51988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move a test to the right place [#51989](https://github.com/ClickHouse/ClickHouse/pull/51989) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a check to validate that the stateful tests are stateful [#51990](https://github.com/ClickHouse/ClickHouse/pull/51990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that functional tests cleanup their tables [#51991](https://github.com/ClickHouse/ClickHouse/pull/51991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_extreme_deduplication [#51992](https://github.com/ClickHouse/ClickHouse/pull/51992) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup SymbolIndex after reload got removed [#51993](https://github.com/ClickHouse/ClickHouse/pull/51993) ([Azat Khuzhin](https://github.com/azat)). +* Update CompletedPipelineExecutor exception log name [#52028](https://github.com/ClickHouse/ClickHouse/pull/52028) ([xiao](https://github.com/nicelulu)). +* Fix `00502_custom_partitioning_replicated_zookeeper_long` [#52032](https://github.com/ClickHouse/ClickHouse/pull/52032) ([Antonio Andelic](https://github.com/antonio2368)). +* Prohibit send_metadata for s3_plain disks [#52038](https://github.com/ClickHouse/ClickHouse/pull/52038) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.4.6.25-stable [#52061](https://github.com/ClickHouse/ClickHouse/pull/52061) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Preparations for Trivial Support For Resharding (part1) [#52068](https://github.com/ClickHouse/ClickHouse/pull/52068) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.3.8.21-lts [#52077](https://github.com/ClickHouse/ClickHouse/pull/52077) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flakiness of test_keeper_s3_snapshot flakiness [#52083](https://github.com/ClickHouse/ClickHouse/pull/52083) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_extreme_deduplication flakiness [#52085](https://github.com/ClickHouse/ClickHouse/pull/52085) ([Azat Khuzhin](https://github.com/azat)). +* Small docs update for toYearWeek() function [#52090](https://github.com/ClickHouse/ClickHouse/pull/52090) ([Andrey Zvonov](https://github.com/zvonand)). +* Small docs update for DateTime, DateTime64 [#52094](https://github.com/ClickHouse/ClickHouse/pull/52094) ([Andrey Zvonov](https://github.com/zvonand)). +* Add missing --force for docker network prune (otherwise it is noop on CI) [#52095](https://github.com/ClickHouse/ClickHouse/pull/52095) ([Azat Khuzhin](https://github.com/azat)). +* tests: drop existing view in test_materialized_mysql_database [#52103](https://github.com/ClickHouse/ClickHouse/pull/52103) ([Azat Khuzhin](https://github.com/azat)). +* Update README.md [#52115](https://github.com/ClickHouse/ClickHouse/pull/52115) ([Tyler Hannan](https://github.com/tylerhannan)). +* Print Zxid in keeper stat command in hex (so as ZooKeeper) [#52122](https://github.com/ClickHouse/ClickHouse/pull/52122) ([Azat Khuzhin](https://github.com/azat)). +* Skip protection from double decompression if inode from maps cannot be obtained [#52138](https://github.com/ClickHouse/ClickHouse/pull/52138) ([Azat Khuzhin](https://github.com/azat)). +* There is no point in detecting flaky tests [#52142](https://github.com/ClickHouse/ClickHouse/pull/52142) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove default argument value [#52143](https://github.com/ClickHouse/ClickHouse/pull/52143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the "kill_mutation" test [#52144](https://github.com/ClickHouse/ClickHouse/pull/52144) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions (and slightly more changes) [#52146](https://github.com/ClickHouse/ClickHouse/pull/52146) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible EADDRINUSE ("Address already in use") in integration tests [#52148](https://github.com/ClickHouse/ClickHouse/pull/52148) ([Azat Khuzhin](https://github.com/azat)). +* Fix test 02497_storage_file_reader_selection [#52154](https://github.com/ClickHouse/ClickHouse/pull/52154) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected AST Set [#52158](https://github.com/ClickHouse/ClickHouse/pull/52158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix slow test `02317_distinct_in_order_optimization` [#52173](https://github.com/ClickHouse/ClickHouse/pull/52173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add comments for https://github.com/ClickHouse/ClickHouse/pull/52112 [#52175](https://github.com/ClickHouse/ClickHouse/pull/52175) ([李扬](https://github.com/taiyang-li)). +* Randomize timezone in tests across non-deterministic around 1970 and default [#52184](https://github.com/ClickHouse/ClickHouse/pull/52184) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_multiple_disks/test.py::test_start_stop_moves` [#52189](https://github.com/ClickHouse/ClickHouse/pull/52189) ([Antonio Andelic](https://github.com/antonio2368)). +* CMake: Simplify job limiting [#52196](https://github.com/ClickHouse/ClickHouse/pull/52196) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix self extracting binaries under qemu linux-user (qemu-$ARCH-static) [#52198](https://github.com/ClickHouse/ClickHouse/pull/52198) ([Azat Khuzhin](https://github.com/azat)). +* Fix `Integration tests flaky check (asan)` [#52201](https://github.com/ClickHouse/ClickHouse/pull/52201) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test test_lost_part [#52202](https://github.com/ClickHouse/ClickHouse/pull/52202) ([alesapin](https://github.com/alesapin)). +* MaterializedMySQL: Replace to_string by magic_enum::enum_name [#52204](https://github.com/ClickHouse/ClickHouse/pull/52204) ([Val Doroshchuk](https://github.com/valbok)). +* MaterializedMySQL: Add tests to parse db and table names from DDL [#52208](https://github.com/ClickHouse/ClickHouse/pull/52208) ([Val Doroshchuk](https://github.com/valbok)). +* Revert "Fixed several issues found by OSS-Fuzz" [#52216](https://github.com/ClickHouse/ClickHouse/pull/52216) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use one copy replication more agressively [#52218](https://github.com/ClickHouse/ClickHouse/pull/52218) ([alesapin](https://github.com/alesapin)). +* Fix flaky test `01076_parallel_alter_replicated_zookeeper` [#52221](https://github.com/ClickHouse/ClickHouse/pull/52221) ([alesapin](https://github.com/alesapin)). +* Fix 01889_key_condition_function_chains for analyzer. [#52223](https://github.com/ClickHouse/ClickHouse/pull/52223) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Inhibit settings randomization in the test `json_ghdata` [#52226](https://github.com/ClickHouse/ClickHouse/pull/52226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better diagnostics in a test [#52227](https://github.com/ClickHouse/ClickHouse/pull/52227) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable no-upgrade-check for 02273_full_sort_join [#52235](https://github.com/ClickHouse/ClickHouse/pull/52235) ([vdimir](https://github.com/vdimir)). +* Fix network manager for integration tests [#52237](https://github.com/ClickHouse/ClickHouse/pull/52237) ([Azat Khuzhin](https://github.com/azat)). +* List replication queue only for current test database [#52238](https://github.com/ClickHouse/ClickHouse/pull/52238) ([Alexander Gololobov](https://github.com/davenger)). +* Attempt to fix assert in tsan with fibers [#52241](https://github.com/ClickHouse/ClickHouse/pull/52241) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix undefined behaviour in fuzzer [#52256](https://github.com/ClickHouse/ClickHouse/pull/52256) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow-up to [#51959](https://github.com/ClickHouse/ClickHouse/issues/51959) [#52261](https://github.com/ClickHouse/ClickHouse/pull/52261) ([Alexander Tokmakov](https://github.com/tavplubix)). +* More fair queue for `drop table sync` [#52276](https://github.com/ClickHouse/ClickHouse/pull/52276) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `02497_trace_events_stress_long` [#52279](https://github.com/ClickHouse/ClickHouse/pull/52279) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test `01111_create_drop_replicated_db_stress` [#52283](https://github.com/ClickHouse/ClickHouse/pull/52283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ugly code [#52284](https://github.com/ClickHouse/ClickHouse/pull/52284) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add missing replica syncs in test_backup_restore_on_cluster [#52306](https://github.com/ClickHouse/ClickHouse/pull/52306) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix test_replicated_database 'node doesn't exist' flakiness [#52307](https://github.com/ClickHouse/ClickHouse/pull/52307) ([Michael Kolupaev](https://github.com/al13n321)). +* Minor: Update description of events "QueryCacheHits/Misses" [#52309](https://github.com/ClickHouse/ClickHouse/pull/52309) ([Robert Schulze](https://github.com/rschu1ze)). +* Beautify pretty-printing of the query string in SYSTEM.QUERY_CACHE [#52312](https://github.com/ClickHouse/ClickHouse/pull/52312) ([Robert Schulze](https://github.com/rschu1ze)). +* Reduce dependencies for skim by avoid using default features [#52316](https://github.com/ClickHouse/ClickHouse/pull/52316) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02725_memory-for-merges [#52317](https://github.com/ClickHouse/ClickHouse/pull/52317) ([alesapin](https://github.com/alesapin)). +* Skip unsupported disks in Keeper [#52321](https://github.com/ClickHouse/ClickHouse/pull/52321) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed" [#52322](https://github.com/ClickHouse/ClickHouse/pull/52322) ([Kruglov Pavel](https://github.com/Avogar)). +* Resubmit [#51716](https://github.com/ClickHouse/ClickHouse/issues/51716) [#52323](https://github.com/ClickHouse/ClickHouse/pull/52323) ([Kruglov Pavel](https://github.com/Avogar)). +* Add logging about all found workflows for merge_pr.py [#52324](https://github.com/ClickHouse/ClickHouse/pull/52324) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Minor: Less awkward IAST::FormatSettings [#52332](https://github.com/ClickHouse/ClickHouse/pull/52332) ([Robert Schulze](https://github.com/rschu1ze)). +* Mark test 02125_many_mutations_2 as no-parallel to avoid flakiness [#52338](https://github.com/ClickHouse/ClickHouse/pull/52338) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix capabilities installed via systemd service (fixes netlink/IO priorities) [#52357](https://github.com/ClickHouse/ClickHouse/pull/52357) ([Azat Khuzhin](https://github.com/azat)). +* Update 01606_git_import.sh [#52360](https://github.com/ClickHouse/ClickHouse/pull/52360) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update ci-slack-bot.py [#52372](https://github.com/ClickHouse/ClickHouse/pull/52372) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `test_keeper_session` [#52373](https://github.com/ClickHouse/ClickHouse/pull/52373) ([Antonio Andelic](https://github.com/antonio2368)). +* Update ci-slack-bot.py [#52374](https://github.com/ClickHouse/ClickHouse/pull/52374) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable analyzer setting in backward_compatibility integration tests. [#52375](https://github.com/ClickHouse/ClickHouse/pull/52375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* New metric - Filesystem cache size limit [#52378](https://github.com/ClickHouse/ClickHouse/pull/52378) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Fix `test_replicated_merge_tree_encrypted_disk ` [#52379](https://github.com/ClickHouse/ClickHouse/pull/52379) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `02122_parallel_formatting_XML ` [#52380](https://github.com/ClickHouse/ClickHouse/pull/52380) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Follow up to [#49698](https://github.com/ClickHouse/ClickHouse/issues/49698) [#52381](https://github.com/ClickHouse/ClickHouse/pull/52381) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Rename TaskStatsInfoGetter into NetlinkMetricsProvider [#52392](https://github.com/ClickHouse/ClickHouse/pull/52392) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_keeper_force_recovery` [#52408](https://github.com/ClickHouse/ClickHouse/pull/52408) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky gtest_lru_file_cache.cpp [#52418](https://github.com/ClickHouse/ClickHouse/pull/52418) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix: remove redundant distinct with views [#52438](https://github.com/ClickHouse/ClickHouse/pull/52438) ([Igor Nikonov](https://github.com/devcrafter)). +* Add 02815_range_dict_no_direct_join to analyzer_tech_debt.txt [#52464](https://github.com/ClickHouse/ClickHouse/pull/52464) ([vdimir](https://github.com/vdimir)). +* do not throw exception in OptimizedRegularExpressionImpl::analyze [#52467](https://github.com/ClickHouse/ClickHouse/pull/52467) ([Han Fei](https://github.com/hanfei1991)). +* Remove skip_startup_tables from IDatabase::loadStoredObjects() [#52491](https://github.com/ClickHouse/ClickHouse/pull/52491) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_insert_same_partition_and_merge by increasing wait time [#52497](https://github.com/ClickHouse/ClickHouse/pull/52497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Try to fix asan wanring in HashJoin [#52499](https://github.com/ClickHouse/ClickHouse/pull/52499) ([Igor Nikonov](https://github.com/devcrafter)). +* Replace with three way comparison [#52509](https://github.com/ClickHouse/ClickHouse/pull/52509) ([flynn](https://github.com/ucasfl)). +* Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop [#52514](https://github.com/ClickHouse/ClickHouse/pull/52514) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_throttling` [#52515](https://github.com/ClickHouse/ClickHouse/pull/52515) ([Antonio Andelic](https://github.com/antonio2368)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `toDecimalString` function [#52520](https://github.com/ClickHouse/ClickHouse/pull/52520) ([Andrey Zvonov](https://github.com/zvonand)). +* Remove unused code [#52527](https://github.com/ClickHouse/ClickHouse/pull/52527) ([Raúl Marín](https://github.com/Algunenano)). +* Cancel execution in PipelineExecutor in case of exception in graph->updateNode [#52533](https://github.com/ClickHouse/ClickHouse/pull/52533) ([Kruglov Pavel](https://github.com/Avogar)). +* Make 01951_distributed_push_down_limit analyzer agnostic [#52534](https://github.com/ClickHouse/ClickHouse/pull/52534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix disallow_concurrency test for backup and restore [#52536](https://github.com/ClickHouse/ClickHouse/pull/52536) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update 02136_scalar_subquery_metrics.sql [#52537](https://github.com/ClickHouse/ClickHouse/pull/52537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01035_avg_weighted_long flakiness [#52556](https://github.com/ClickHouse/ClickHouse/pull/52556) ([Azat Khuzhin](https://github.com/azat)). +* tests: increase throttling for 01923_network_receive_time_metric_insert [#52557](https://github.com/ClickHouse/ClickHouse/pull/52557) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 00719_parallel_ddl_table flakiness in debug builds [#52558](https://github.com/ClickHouse/ClickHouse/pull/52558) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 01821_join_table_race_long flakiness [#52559](https://github.com/ClickHouse/ClickHouse/pull/52559) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `00995_exception_while_insert` [#52568](https://github.com/ClickHouse/ClickHouse/pull/52568) ([Antonio Andelic](https://github.com/antonio2368)). +* MaterializedMySQL: Fix typos in tests [#52575](https://github.com/ClickHouse/ClickHouse/pull/52575) ([Val Doroshchuk](https://github.com/valbok)). +* Fix `02497_trace_events_stress_long` again [#52587](https://github.com/ClickHouse/ClickHouse/pull/52587) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Remove `mmap/mremap/munmap` from Allocator.h" [#52589](https://github.com/ClickHouse/ClickHouse/pull/52589) ([Nikita Taranov](https://github.com/nickitat)). +* Remove peak memory usage from the final message in the client [#52598](https://github.com/ClickHouse/ClickHouse/pull/52598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* GinIndexStore: fix a bug when files are finalizated after first write, [#52602](https://github.com/ClickHouse/ClickHouse/pull/52602) ([Sema Checherinda](https://github.com/CheSema)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix build with clang-15 [#52627](https://github.com/ClickHouse/ClickHouse/pull/52627) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix style [#52647](https://github.com/ClickHouse/ClickHouse/pull/52647) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logging level of a noisy message [#52648](https://github.com/ClickHouse/ClickHouse/pull/52648) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Added field `refcount` to `system.remote_data_paths` table" [#52657](https://github.com/ClickHouse/ClickHouse/pull/52657) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8b535e3d897..1eabc65a10f 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.7.1.2470-stable 2023-07-27 v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 From ce38d3c5ea45507696430e9c7f39f9ab7b9de394 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:11:08 +0200 Subject: [PATCH 081/151] address comment --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 53481ab06a0..dbb4f7f0d8e 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,11 +10,17 @@ #include #include +#include "Common/Exception.h" #include namespace DB { +namespace ErrorCodes +{ + extern const int ABORTED; +} + namespace { @@ -271,8 +277,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf if (stopped) { - LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we drop this part."); - return; + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); } chassert(current_writer != nullptr); From f5dfb70f5c2f4b94a54e9fdb97737a70b28362ad Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:12:20 +0200 Subject: [PATCH 082/151] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index dbb4f7f0d8e..80e94b2fd39 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,7 +10,7 @@ #include #include -#include "Common/Exception.h" +#include #include namespace DB From b5160c8072b9660e7c62b8305f56eda26313ecb0 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 27 Jul 2023 14:16:19 +0000 Subject: [PATCH 083/151] Fix bugs and better test for SYSTEM STOP LISTEN --- programs/server/Server.cpp | 25 ++-- src/Server/ServerType.cpp | 20 +-- src/Server/ServerType.h | 1 - .../configs/cluster.xml | 4 +- .../configs/protocols.xml | 23 ++++ .../test_system_start_stop_listen/test.py | 128 ++++++++++++++++-- 6 files changed, 157 insertions(+), 44 deletions(-) create mode 100644 tests/integration/test_system_start_stop_listen/configs/protocols.xml diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index dce52ecdb12..adec13a5199 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -2031,27 +2031,26 @@ void Server::createServers( for (const auto & protocol : protocols) { - if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + std::string prefix = "protocols." + protocol + "."; + std::string port_name = prefix + "port"; + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + + if (!config.has(prefix + "port")) + continue; + + if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name)) continue; std::vector hosts; - if (config.has("protocols." + protocol + ".host")) - hosts.push_back(config.getString("protocols." + protocol + ".host")); + if (config.has(prefix + "host")) + hosts.push_back(config.getString(prefix + "host")); else hosts = listen_hosts; for (const auto & host : hosts) { - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - - if (!config.has(prefix + "port")) - continue; - - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; bool is_secure = false; auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp index c6916ee39d9..4952cd1bd24 100644 --- a/src/Server/ServerType.cpp +++ b/src/Server/ServerType.cpp @@ -40,7 +40,7 @@ const char * ServerType::serverTypeToString(ServerType::Type type) return type_name.data(); } -bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) const +bool ServerType::shouldStart(Type server_type, const std::string & server_custom_name) const { if (type == Type::QUERIES_ALL) return true; @@ -77,13 +77,15 @@ bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) } } - return type == server_type && custom_name == custom_name_; + if (type == Type::CUSTOM) + return server_type == type && server_custom_name == "protocols." + custom_name + ".port"; + + return server_type == type; } bool ServerType::shouldStop(const std::string & port_name) const { Type port_type; - std::string port_custom_name; if (port_name == "http_port") port_type = Type::HTTP; @@ -119,20 +121,12 @@ bool ServerType::shouldStop(const std::string & port_name) const port_type = Type::INTERSERVER_HTTPS; else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) - { - constexpr size_t protocols_size = std::string_view("protocols.").size(); - constexpr size_t port_size = std::string_view("protocols.").size(); - port_type = Type::CUSTOM; - port_custom_name = port_name.substr(protocols_size, port_name.size() - port_size); - } - else - port_type = Type::UNKNOWN; - if (port_type == Type::UNKNOWN) + else return false; - return shouldStart(type, port_custom_name); + return shouldStart(port_type, port_name); } } diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h index 345d1a10119..8fb81622ab3 100644 --- a/src/Server/ServerType.h +++ b/src/Server/ServerType.h @@ -10,7 +10,6 @@ public: enum Type { - UNKNOWN, TCP, TCP_WITH_PROXY, TCP_SECURE, diff --git a/tests/integration/test_system_start_stop_listen/configs/cluster.xml b/tests/integration/test_system_start_stop_listen/configs/cluster.xml index 93d8f890f40..34b6c32c6d0 100644 --- a/tests/integration/test_system_start_stop_listen/configs/cluster.xml +++ b/tests/integration/test_system_start_stop_listen/configs/cluster.xml @@ -3,11 +3,11 @@ - node1 + main_node 9000 - node2 + backup_node 9000 diff --git a/tests/integration/test_system_start_stop_listen/configs/protocols.xml b/tests/integration/test_system_start_stop_listen/configs/protocols.xml new file mode 100644 index 00000000000..1d8608bcaca --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/protocols.xml @@ -0,0 +1,23 @@ + + 0.0.0.0 + + + 9000 + 8123 + 9004 + + + + + tcp + 0.0.0.0 + 9001 + native protocol (tcp) + + + http + 8124 + http protocol + + + diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py index ec1a000c599..0db313368fd 100644 --- a/tests/integration/test_system_start_stop_listen/test.py +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -2,20 +2,16 @@ import pytest -import time from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager -from helpers.test_tools import assert_eq_with_retry -import random -import string -import json +from helpers.client import Client +import requests cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["configs/cluster.xml"], with_zookeeper=True +main_node = cluster.add_instance( + "main_node", main_configs=["configs/cluster.xml", "configs/protocols.xml"], with_zookeeper=True ) -node2 = cluster.add_instance( - "node2", main_configs=["configs/cluster.xml"], with_zookeeper=True +backup_node = cluster.add_instance( + "backup_node", main_configs=["configs/cluster.xml"], with_zookeeper=True ) @@ -30,11 +26,113 @@ def started_cluster(): cluster.shutdown() -def test_system_start_stop_listen_queries(started_cluster): - node1.query("SYSTEM STOP LISTEN QUERIES ALL") +def http_works(port=8123): + try: + response = requests.post(f"http://{main_node.ip_address}:{port}/ping") + if response.status_code == 400: + return True + except: + pass - assert "Connection refused" in node1.query_and_get_error("SELECT 1", timeout=3) + return False - node2.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") +def assert_everything_works(): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() + assert http_works(8124) - node1.query("SELECT 1") +QUERY = "SELECT 1" +MYSQL_QUERY = "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', 'default', '', SETTINGS connect_timeout = 100, connection_wait_timeout = 100)" + +def test_default_protocols(started_cluster): + # TCP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN TCP") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default TCP") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN HTTP") + assert http_works() == False + main_node.query("SYSTEM START LISTEN HTTP") + + # MySQL + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN MYSQL") + assert "Connections to mysql failed" in main_node.query_and_get_error(MYSQL_QUERY) + main_node.query("SYSTEM START LISTEN MYSQL") + + assert_everything_works() + +def test_custom_protocols(started_cluster): + # TCP + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'tcp'") + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + main_node.query("SYSTEM START LISTEN CUSTOM 'tcp'") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'http'") + assert http_works(8124) == False + main_node.query("SYSTEM START LISTEN CUSTOM 'http'") + + assert_everything_works() + +def test_all_protocols(started_cluster): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + + # STOP LISTEN QUERIES ALL + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() == False + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + # STOP LISTEN QUERIES DEFAULT + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES DEFAULT") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + + # STOP LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + + # Disable all protocols, check first START LISTEN QUERIES DEFAULT then START LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() From ee9bad7a3140cc3164cbf36ef45486068b4be0e7 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 27 Jul 2023 14:18:46 +0000 Subject: [PATCH 084/151] Fix style --- tests/integration/test_system_start_stop_listen/test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py index 0db313368fd..1925685af03 100644 --- a/tests/integration/test_system_start_stop_listen/test.py +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -8,7 +8,9 @@ import requests cluster = ClickHouseCluster(__file__) main_node = cluster.add_instance( - "main_node", main_configs=["configs/cluster.xml", "configs/protocols.xml"], with_zookeeper=True + "main_node", + main_configs=["configs/cluster.xml", "configs/protocols.xml"], + with_zookeeper=True, ) backup_node = cluster.add_instance( "backup_node", main_configs=["configs/cluster.xml"], with_zookeeper=True @@ -36,6 +38,7 @@ def http_works(port=8123): return False + def assert_everything_works(): custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) main_node.query(QUERY) @@ -44,9 +47,11 @@ def assert_everything_works(): assert http_works() assert http_works(8124) + QUERY = "SELECT 1" MYSQL_QUERY = "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', 'default', '', SETTINGS connect_timeout = 100, connection_wait_timeout = 100)" + def test_default_protocols(started_cluster): # TCP assert_everything_works() @@ -68,6 +73,7 @@ def test_default_protocols(started_cluster): assert_everything_works() + def test_custom_protocols(started_cluster): # TCP custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) @@ -84,6 +90,7 @@ def test_custom_protocols(started_cluster): assert_everything_works() + def test_all_protocols(started_cluster): custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) assert_everything_works() From 0d44d527ef590a5471ea577c132edb42f0c99c70 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:27:04 +0200 Subject: [PATCH 085/151] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 80e94b2fd39..25b93160d27 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -241,8 +241,7 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: if (stopped) { - LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we add this part."); - return {}; + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); } chassert(current_writer != nullptr); From f6ca013c536d76ca6c1403db5f84d792f6bd8864 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:28:00 +0200 Subject: [PATCH 086/151] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 25b93160d27..22dabc43a8c 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -11,7 +11,6 @@ #include #include -#include namespace DB { From 9488567bf6be7e2b751917a179222478fcb46f5e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:28:08 +0200 Subject: [PATCH 087/151] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 22dabc43a8c..548b61ce422 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -240,7 +240,7 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: if (stopped) { - throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we add this part."); } chassert(current_writer != nullptr); From 5611b2fff484d74c70c8ad6b62ba8d66c0b63589 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 27 Jul 2023 15:45:55 +0200 Subject: [PATCH 088/151] Add a note about not working _table filter for Merge with analyzer Signed-off-by: Azat Khuzhin --- src/Storages/StorageMerge.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b0ed242d14d..272f35303bd 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -795,6 +795,10 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( bool filter_by_database_virtual_column /* = false */, bool filter_by_table_virtual_column /* = false */) const { + /// FIXME: filtering does not work with allow_experimental_analyzer due to + /// different column names there (it has "table_name._table" not just + /// "_table") + assert(!filter_by_database_virtual_column || !filter_by_table_virtual_column || query); const Settings & settings = query_context->getSettingsRef(); From 68aed0d16e331a6ba6b592243f10ce2a816152db Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 26 Jul 2023 20:25:48 +0200 Subject: [PATCH 089/151] RFC: Fix filtering by virtual columns with OR expression Virtual columns did not supports queries with OR, for example query like this (here `m` is the `Merge` table, see the test): select key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1'); Will always leads to: Cannot find column `value` in source stream, there are only columns ... The reason for this is that it actually executes the following queries: SELECT key, value FROM default.d1 WHERE ((value = 10) AND ('v1' = 'v1')) OR ((value = 20) AND ('v1' = 'v1')); SELECT key FROM default.d2 WHERE 0; And this kind of filtering is used not only for `Merge` table but also: - `_table` for `Merge` (already mentioned) - `_file` for `File` - `_idx` for `S3` - and as well as filtering `system.*` tables by `database`/`table`/... Signed-off-by: Azat Khuzhin --- src/Storages/VirtualColumnUtils.cpp | 36 +++++++++++++----- .../02840_merge__table_or_filter.reference | 38 +++++++++++++++++++ .../02840_merge__table_or_filter.sql.j2 | 34 +++++++++++++++++ 3 files changed, 99 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02840_merge__table_or_filter.reference create mode 100644 tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 907fc0cd22c..79be1f98a0f 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -63,14 +64,31 @@ bool isValidFunction(const ASTPtr & expression, const std::function & is_constant, ASTs & result) { const auto * function = expression->as(); - if (function && (function->name == "and" || function->name == "indexHint")) + + if (function) { - bool ret = true; - for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, result); - return ret; + if (function->name == "and" || function->name == "indexHint") + { + bool ret = true; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, result); + return ret; + } + else if (function->name == "or") + { + bool ret = true; + ASTs or_args; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, or_args); + /// We can keep condition only if it still OR condition (i.e. we + /// have dependent conditions for columns at both sides) + if (or_args.size() == 2) + result.push_back(makeASTForLogicalOr(std::move(or_args))); + return ret; + } } - else if (isValidFunction(expression, is_constant)) + + if (isValidFunction(expression, is_constant)) { result.push_back(expression->clone()); return true; @@ -80,13 +98,13 @@ bool extractFunctions(const ASTPtr & expression, const std::function Date: Thu, 27 Jul 2023 09:49:34 +0000 Subject: [PATCH 090/151] Add query cache metrics to system.asynchronous_metrics Cf. https://github.com/ClickHouse/ClickHouse/pull/52384#issuecomment-1653241216 --- docs/en/operations/query-cache.md | 11 ++++++----- .../operations/system-tables/asynchronous_metrics.md | 12 ++++++++++++ docs/en/operations/system-tables/events.md | 2 ++ docs/en/operations/system-tables/metrics.md | 2 +- src/Interpreters/Cache/QueryCache.cpp | 10 ++++++++++ src/Interpreters/Cache/QueryCache.h | 5 ++++- src/Interpreters/ServerAsynchronousMetrics.cpp | 6 ++++++ 7 files changed, 41 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 547105c65cc..d0b785d8fda 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -61,11 +61,12 @@ use_query_cache = true`) but one should keep in mind that all `SELECT` queries i may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table -`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table -[system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = -true". Other queries do not affect the cache miss counter. Field `query_log_usage` in system table -[system.query_log](system-tables/query_log.md) shows for each ran query whether the query result was written into or read from the query -cache. +`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and +"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run +with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_log_usage` in system table +[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the +query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table +[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains. The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be changed (see below) but doing so is not recommended for security reasons. diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index f357341da67..e46b495239c 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + ## Metric descriptions @@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b Number of threads in the server of the PostgreSQL compatibility protocol. +### QueryCacheBytes + +Total size of the query cache cache in bytes. + +### QueryCacheEntries + +Total number of entries in the query cache. + ### ReplicasMaxAbsoluteDelay Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index ba5602ee292..7846fe4be5d 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -11,6 +11,8 @@ Columns: - `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. - `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp). + **Example** ``` sql diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 5a7dfd03eb4..b1dcea5500f 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -11,7 +11,7 @@ Columns: - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. - `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. -The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. +You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). **Example** diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index a6c509e8bb1..5982a5ade50 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -496,6 +496,16 @@ void QueryCache::reset() cache_size_in_bytes = 0; } +size_t QueryCache::weight() const +{ + return cache.weight(); +} + +size_t QueryCache::count() const +{ + return cache.count(); +} + size_t QueryCache::recordQueryRun(const Key & key) { std::lock_guard lock(mutex); diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c2de8ca22dd..eaa54c503fa 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -186,6 +186,9 @@ public: void reset(); + size_t weight() const; + size_t count() const; + /// Record new execution of query represented by key. Returns number of executions so far. size_t recordQueryRun(const Key & key); @@ -193,7 +196,7 @@ public: std::vector dump() const; private: - Cache cache; + Cache cache; /// has its own locking --> not protected by mutex mutable std::mutex mutex; TimesExecuted times_executed TSA_GUARDED_BY(mutex); diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 0fbcfc9e6a1..68411e80755 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -92,6 +92,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."}; } + if (auto query_cache = getContext()->getQueryCache()) + { + new_values["QueryCacheBytes"] = { query_cache->weight(), "Total size of the query cache in bytes." }; + new_values["QueryCacheEntries"] = { query_cache->count(), "Total number of entries in the query cache." }; + } + { auto caches = FileCacheFactory::instance().getAll(); size_t total_bytes = 0; From dfc06d27143da106adc6d5fae3b5be089f1e2d64 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 27 Jul 2023 16:13:29 +0000 Subject: [PATCH 091/151] fix reading of unneded column in case of multistage prewhere --- .../MergeTree/MergeTreeBlockReadUtils.cpp | 6 +++-- ...02833_multiprewhere_extra_column.reference | 2 ++ .../02833_multiprewhere_extra_column.sql | 25 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02833_multiprewhere_extra_column.reference create mode 100644 tests/queries/0_stateless/02833_multiprewhere_extra_column.sql diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index d830ba37e71..48779aa0df7 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -340,8 +340,10 @@ MergeTreeReadTaskColumns getReadTaskColumns( if (!columns_from_previous_steps.contains(name)) step_column_names.push_back(name); - injectRequiredColumns( - data_part_info_for_reader, storage_snapshot, with_subcolumns, step_column_names); + if (!step_column_names.empty()) + injectRequiredColumns( + data_part_info_for_reader, storage_snapshot, + with_subcolumns, step_column_names); /// More columns could have been added, filter them as well by the list of columns from previous steps. Names columns_to_read_in_step; diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.reference b/tests/queries/0_stateless/02833_multiprewhere_extra_column.reference new file mode 100644 index 00000000000..45571c71477 --- /dev/null +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.reference @@ -0,0 +1,2 @@ +10496500 +4 diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql new file mode 100644 index 00000000000..a786de454ed --- /dev/null +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -0,0 +1,25 @@ +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings + +drop table if exists t_multi_prewhere; +drop row policy if exists policy_02834 on t_multi_prewhere; + +create table t_multi_prewhere (a UInt64, b UInt64, c UInt8) +engine = MergeTree order by tuple() +settings min_bytes_for_wide_part = 0; + +create row policy policy_02834 on t_multi_prewhere using a > 2000 as permissive to all; +insert into t_multi_prewhere select number, number, number from numbers(10000); + +system drop mark cache; +select sum(b) from t_multi_prewhere prewhere a < 5000; + +system flush logs; + +select ProfileEvents['FileOpen'] from system.query_log +where + type = 'QueryFinish' + and current_database = currentDatabase() + and query ilike '%select sum(b) from t_multi_prewhere prewhere a < 5000%'; + +drop table if exists t_multi_prewhere; +drop row policy if exists policy_02834 on t_multi_prewhere; From 03f025ddd1352c264ceb716e731efc095f9cc0c3 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 27 Jul 2023 16:38:02 +0000 Subject: [PATCH 092/151] Fix typo --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 5dbe9e350bf..9fdc99bc760 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6530,7 +6530,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif array_join_column_expressions.push_back(std::move(array_join_column_node)); }; - // Support ARRAY JOIN COLUMNS(...). COLUMNS trasformer is resolved to list of columns. + // Support ARRAY JOIN COLUMNS(...). COLUMNS transformer is resolved to list of columns. if (auto * columns_list = array_join_expression->as()) { for (auto & array_join_subexpression : columns_list->getNodes()) From 380da315121078fc3e88a1e038e5aacd296853c2 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 27 Jul 2023 19:03:44 +0200 Subject: [PATCH 093/151] Improvements to backup restore disallow_concurrency test --- .../test_disallow_concurrency.py | 102 +++++++++++++----- 1 file changed, 73 insertions(+), 29 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index a863a6e2047..af1b2656227 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -133,21 +133,31 @@ def test_concurrent_backups_on_same_node(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - try: - error = nodes[0].query_and_get_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) - except Exception as e: + result, error = nodes[0].query_and_get_answer_with_error( + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + ) + + if not error: status = ( nodes[0] .query(f"SELECT status FROM system.backups WHERE id == '{id}'") .rstrip("\n") ) # It is possible that the second backup was picked up first, and then the async backup - if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + if status == "BACKUP_FAILED": + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE id = '{id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent backups both passed, when one is expected to fail") + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -191,20 +201,31 @@ def test_concurrent_backups_on_different_nodes(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - try: - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) - except Exception as e: + ) + + if not error: status = ( nodes[1] .query(f"SELECT status FROM system.backups WHERE id == '{id}'") .rstrip("\n") ) - if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + # It is possible that the second backup was picked up first, and then the async backup + if status == "BACKUP_FAILED": + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + nodes[1], + f"SELECT status FROM system.backups WHERE id = '{id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent backups both passed, when one is expected to fail") + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -247,20 +268,32 @@ def test_concurrent_restores_on_same_node(): ) assert status in ["RESTORING", "RESTORED"] - try: - error = nodes[0].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) - except Exception as e: + result, error = nodes[0].query_and_get_answer_with_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + + if not error: status = ( nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") .rstrip("\n") ) - if status == "RESTORING" or status == "RESTORE_FAILED": + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + return + elif status == "RESTORING": + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE id == '{restore_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent restores both passed, when one is expected to fail") + + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", @@ -303,20 +336,31 @@ def test_concurrent_restores_on_different_node(): ) assert status in ["RESTORING", "RESTORED"] - try: - error = nodes[1].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) - except Exception as e: + result, error = nodes[1].query_and_get_answer_with_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + + if not error: status = ( nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") .rstrip("\n") ) - if status == "RESTORING" or status == "RESTORE_FAILED": + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + return + elif status == "RESTORING": + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE id == '{restore_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent restores both passed, when one is expected to fail") + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", From a8a48af7cbd20334af531e9f71e7f6005a098db1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 27 Jul 2023 19:17:57 +0200 Subject: [PATCH 094/151] Fix 02417_opentelemetry_insert_on_distributed_table flakiness Looks like everything is OK with opentelemetry, and the reason of the flakiness is this: $ gg opentelemetry_start_trace_probability tests/**.xml tests/config/users.d/opentelemetry.xml: 0.1 So let's simply disable it. And also let's stop the distributed sends to increase the failure rate if there is some problem left. Signed-off-by: Azat Khuzhin --- .../02417_opentelemetry_insert_on_distributed_table.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh index edc3d06e5bf..5a1e33a8459 100755 --- a/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh +++ b/tests/queries/0_stateless/02417_opentelemetry_insert_on_distributed_table.sh @@ -20,7 +20,9 @@ function insert() -H "tracestate: $4" \ "${CLICKHOUSE_URL}" \ --data @- - ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH DISTRIBUTED ${CLICKHOUSE_DATABASE}.dist_opentelemetry" + + # disable probabilistic tracing to avoid stealing the trace context + ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=0 -q "SYSTEM FLUSH DISTRIBUTED ${CLICKHOUSE_DATABASE}.dist_opentelemetry" } function check_span() @@ -69,6 +71,8 @@ DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.local_opentelemetry; CREATE TABLE ${CLICKHOUSE_DATABASE}.dist_opentelemetry (key UInt64) Engine=Distributed('test_cluster_two_shards_localhost', ${CLICKHOUSE_DATABASE}, local_opentelemetry, key % 2); CREATE TABLE ${CLICKHOUSE_DATABASE}.local_opentelemetry (key UInt64) Engine=MergeTree ORDER BY key; + +SYSTEM STOP DISTRIBUTED SENDS ${CLICKHOUSE_DATABASE}.dist_opentelemetry; " # From 2717be7c476d9d7c3be841bbefdcd8688a76e7fe Mon Sep 17 00:00:00 2001 From: Anton Kozlov Date: Thu, 27 Jul 2023 17:34:53 +0000 Subject: [PATCH 095/151] [minor][bugfix] fix connected_zk_index column of system.zookeeper_connection table --- src/Common/ZooKeeper/IKeeper.h | 2 -- src/Common/ZooKeeper/TestKeeper.h | 3 --- src/Common/ZooKeeper/ZooKeeper.cpp | 26 ++++++-------------------- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 14 ++++++++------ src/Common/ZooKeeper/ZooKeeperImpl.h | 8 ++++---- 5 files changed, 18 insertions(+), 35 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 5240acc2616..2b2a043d389 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -490,8 +490,6 @@ public: /// Useful to check owner of ephemeral node. virtual int64_t getSessionID() const = 0; - virtual Poco::Net::SocketAddress getConnectedAddress() const = 0; - /// If the method will throw an exception, callbacks won't be called. /// /// After the method is executed successfully, you must wait for callbacks diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 8615ed0fb77..69840cbeff6 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -40,7 +40,6 @@ public: bool isExpired() const override { return expired; } int64_t getSessionID() const override { return 0; } - Poco::Net::SocketAddress getConnectedAddress() const override { return connected_zk_address; } void create( @@ -135,8 +134,6 @@ private: zkutil::ZooKeeperArgs args; - Poco::Net::SocketAddress connected_zk_address; - std::mutex push_request_mutex; std::atomic expired{false}; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 7a8088c960b..0fe536b1a08 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -112,31 +112,17 @@ void ZooKeeper::init(ZooKeeperArgs args_) throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZCONNECTIONLOSS); } - impl = std::make_unique(nodes, args, zk_log); + impl = std::make_unique(nodes, args, zk_log, [this](size_t node_idx, const Coordination::ZooKeeper::Node & node) + { + connected_zk_host = node.address.host().toString(); + connected_zk_port = node.address.port(); + connected_zk_index = node_idx; + }); if (args.chroot.empty()) LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ",")); else LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); - - Poco::Net::SocketAddress address = impl->getConnectedAddress(); - - connected_zk_host = address.host().toString(); - connected_zk_port = address.port(); - - connected_zk_index = 0; - - if (args.hosts.size() > 1) - { - for (size_t i = 0; i < args.hosts.size(); i++) - { - if (args.hosts[i] == address.toString()) - { - connected_zk_index = i; - break; - } - } - } } else if (args.implementation == "testkeeper") { diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 5e16a437be3..74b0b039ca9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -313,8 +313,8 @@ ZooKeeper::~ZooKeeper() ZooKeeper::ZooKeeper( const Nodes & nodes, const zkutil::ZooKeeperArgs & args_, - std::shared_ptr zk_log_) - : args(args_) + std::shared_ptr zk_log_, std::optional && connected_callback_) + : args(args_), connected_callback(std::move(connected_callback_)) { log = &Poco::Logger::get("ZooKeeperClient"); std::atomic_store(&zk_log, std::move(zk_log_)); @@ -395,8 +395,9 @@ void ZooKeeper::connect( WriteBufferFromOwnString fail_reasons; for (size_t try_no = 0; try_no < num_tries; ++try_no) { - for (const auto & node : nodes) + for (size_t i = 0; i < nodes.size(); ++i) { + const auto & node = nodes[i]; try { /// Reset the state of previous attempt. @@ -443,9 +444,11 @@ void ZooKeeper::connect( e.addMessage("while receiving handshake from ZooKeeper"); throw; } - connected = true; - connected_zk_address = node.address; + + if (connected_callback.has_value()) + (*connected_callback)(i, node); + break; } @@ -462,7 +465,6 @@ void ZooKeeper::connect( if (!connected) { WriteBufferFromOwnString message; - connected_zk_address = Poco::Net::SocketAddress(); message << "All connection tries failed while connecting to ZooKeeper. nodes: "; bool first = true; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 7e27608d0a1..3684b215144 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -107,6 +107,7 @@ public: }; using Nodes = std::vector; + using ConnectedCallback = std::function; /** Connection to nodes is performed in order. If you want, shuffle them manually. * Operation timeout couldn't be greater than session timeout. @@ -115,7 +116,8 @@ public: ZooKeeper( const Nodes & nodes, const zkutil::ZooKeeperArgs & args_, - std::shared_ptr zk_log_); + std::shared_ptr zk_log_, + std::optional && connected_callback_ = {}); ~ZooKeeper() override; @@ -126,8 +128,6 @@ public: /// Useful to check owner of ephemeral node. int64_t getSessionID() const override { return session_id; } - Poco::Net::SocketAddress getConnectedAddress() const override { return connected_zk_address; } - void executeGenericRequest( const ZooKeeperRequestPtr & request, ResponseCallback callback); @@ -213,9 +213,9 @@ public: private: ACLs default_acls; - Poco::Net::SocketAddress connected_zk_address; zkutil::ZooKeeperArgs args; + std::optional connected_callback = {}; /// Fault injection void maybeInjectSendFault(); From b95745d916330abb3306016d512b40d3d24616dd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 17:52:44 +0000 Subject: [PATCH 096/151] fix: check positional options --- src/Client/ClientBase.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 496fc8fce0a..06dabf96c28 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2624,6 +2624,10 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); } + /// Check positional options. + if (std::ranges::count_if(parsed.options, [](const auto & op){ return !op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--"); }) > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); + po::store(parsed, options); } From 9340f02d26ae7f170611ea9b19a11e720b41b765 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jul 2023 18:33:07 +0000 Subject: [PATCH 097/151] Silence spell check --- .../aspell-ignore/en/aspell-dict.txt | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a314815e2c4..80aeadd8738 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -211,7 +211,6 @@ Decrypted Deduplicate Deduplication DelayedInserts -delim DeliveryTag DeltaLake Denormalize @@ -699,6 +698,8 @@ PyCharm QEMU QTCreator Quantile +QueryCacheBytes +QueryCacheEntries QueryCacheHits QueryCacheMisses QueryPreempted @@ -761,9 +762,9 @@ RoaringBitmap RocksDB Rollup RowBinary +RowBinaryWithDefaults RowBinaryWithNames RowBinaryWithNamesAndTypes -RowBinaryWithDefaults Runtime SATA SELECTs @@ -776,7 +777,6 @@ SMALLINT SPNEGO SQEs SQLAlchemy -SquaredDistance SQLConsoleDetail SQLInsert SQLSTATE @@ -811,6 +811,7 @@ Smirnov'test Soundex SpanKind Spearman's +SquaredDistance StartTLS StartTime StartupSystemTables @@ -838,8 +839,6 @@ Subexpression Submodules Subqueries Substrings -substringIndex -substringIndexUTF SummingMergeTree SuperSet Superset @@ -1272,6 +1271,7 @@ cryptographic csv csvwithnames csvwithnamesandtypes +curdate currentDatabase currentProfiles currentRoles @@ -1331,6 +1331,7 @@ defaultProfiles defaultRoles defaultValueOfArgumentType defaultValueOfTypeName +delim deltaLake deltaSum deltaSumTimestamp @@ -1542,13 +1543,13 @@ hadoop halfMD halfday hardlinks +hasAll +hasAny +hasColumnInTable hasSubsequence hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF -hasAll -hasAny -hasColumnInTable hasSubstr hasToken hasTokenCaseInsensitive @@ -1590,10 +1591,10 @@ incrementing indexHint indexOf infi -initialQueryID -initializeAggregation initcap initcapUTF +initialQueryID +initializeAggregation injective innogames inodes @@ -2131,9 +2132,9 @@ routineley rowNumberInAllBlocks rowNumberInBlock rowbinary +rowbinarywithdefaults rowbinarywithnames rowbinarywithnamesandtypes -rowbinarywithdefaults rsync rsyslog runnable @@ -2185,8 +2186,8 @@ sleepEachRow snowflakeToDateTime socketcache soundex -sparkbar sparkBar +sparkbar sparsehash speedscope splitByChar @@ -2256,6 +2257,8 @@ subreddits subseconds subsequence substring +substringIndex +substringIndexUTF substringUTF substrings subtitiles @@ -2556,4 +2559,3 @@ znode znodes zookeeperSessionUptime zstd -curdate From 5942c80faed38febea5394526b5e5c670b03bd4d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 19:11:44 +0000 Subject: [PATCH 098/151] fix test: '--option' now is allowed after terminating '--' --- .../02096_bad_options_in_client_and_local.reference | 2 -- .../0_stateless/02096_bad_options_in_client_and_local.sh | 4 ---- 2 files changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference index c4c0901b9df..432299e9556 100644 --- a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference @@ -8,5 +8,3 @@ OK OK OK OK -OK -OK diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh index d37155e8506..753d56fb424 100755 --- a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh @@ -9,8 +9,6 @@ ${CLICKHOUSE_LOCAL} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" ${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" -${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" - ${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" ${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" @@ -22,8 +20,6 @@ ${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" ${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" -${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" - ${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" ${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" From eb74e658b7dac9cbfbd4027d2281ab4fc5c173a1 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 27 Jul 2023 19:12:06 +0000 Subject: [PATCH 099/151] Fix build --- src/Server/ServerType.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h index 8fb81622ab3..1fab492222a 100644 --- a/src/Server/ServerType.h +++ b/src/Server/ServerType.h @@ -33,7 +33,8 @@ public: static const char * serverTypeToString(Type type); - bool shouldStart(Type server_type, const std::string & custom_name_ = "") const; + /// Checks whether provided in the arguments type should be started or stopped based on current server type. + bool shouldStart(Type server_type, const std::string & server_custom_name = "") const; bool shouldStop(const std::string & port_name) const; Type type; From dba8b445bd37b2fb9fb4983e0a3f740649dcbb5b Mon Sep 17 00:00:00 2001 From: Jai Jhala Date: Thu, 27 Jul 2023 12:32:53 -0700 Subject: [PATCH 100/151] Update default output_format_arrow_compression.md Updates the default parameter of output_format_arrow_compression_method from "none" to "lz4_frame". --- docs/en/operations/settings/settings-formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index ee8e0d547b8..fb10ff7f61b 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1164,7 +1164,7 @@ Enabled by default. Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) -Default value: `none`. +Default value: `lz4_frame`. ## ORC format settings {#orc-format-settings} From 4629ab1df1cf6e5de3577ff94f7bd600207f24b6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 27 Jul 2023 23:44:32 +0400 Subject: [PATCH 101/151] add test with broken pipe --- .../integration/helpers/s3_mocks/broken_s3.py | 80 +++++++++++++------ .../test_checking_s3_blobs_paranoid/test.py | 77 ++++++++++++++++-- 2 files changed, 126 insertions(+), 31 deletions(-) diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 6e1572af262..206f960293f 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -37,9 +37,7 @@ class MockControl: ) assert response == "OK", response - def setup_action( - self, when, count=None, after=None, action="error_500", action_args=None - ): + def setup_action(self, when, count=None, after=None, action=None, action_args=None): url = f"http://localhost:{self._port}/mock_settings/{when}?nothing=1" if count is not None: @@ -128,8 +126,14 @@ class MockControl: class _ServerRuntime: class SlowPut: def __init__( - self, probability_=None, timeout_=None, minimal_length_=None, count_=None + self, + lock, + probability_=None, + timeout_=None, + minimal_length_=None, + count_=None, ): + self.lock = lock self.probability = probability_ if probability_ is not None else 1 self.timeout = timeout_ if timeout_ is not None else 0.1 self.minimal_length = minimal_length_ if minimal_length_ is not None else 0 @@ -144,14 +148,15 @@ class _ServerRuntime: ) def get_timeout(self, content_length): - if content_length > self.minimal_length: - if self.count > 0: - if ( - _runtime.slow_put.probability == 1 - or random.random() <= _runtime.slow_put.probability - ): - self.count -= 1 - return _runtime.slow_put.timeout + with self.lock: + if content_length > self.minimal_length: + if self.count > 0: + if ( + _runtime.slow_put.probability == 1 + or random.random() <= _runtime.slow_put.probability + ): + self.count -= 1 + return _runtime.slow_put.timeout return None class Expected500ErrorAction: @@ -199,29 +204,48 @@ class _ServerRuntime: ) request_handler.connection.close() + class BrokenPipeAction: + def inject_error(self, request_handler): + # partial read + self.rfile.read(50) + + time.sleep(1) + request_handler.connection.setsockopt( + socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 1, 0) + ) + request_handler.connection.close() + class ConnectionRefusedAction(RedirectAction): pass class CountAfter: - def __init__(self, count_=None, after_=None, action_=None, action_args_=[]): + def __init__( + self, lock, count_=None, after_=None, action_=None, action_args_=[] + ): + self.lock = lock + self.count = count_ if count_ is not None else INF_COUNT self.after = after_ if after_ is not None else 0 self.action = action_ self.action_args = action_args_ + if self.action == "connection_refused": self.error_handler = _ServerRuntime.ConnectionRefusedAction() elif self.action == "connection_reset_by_peer": self.error_handler = _ServerRuntime.ConnectionResetByPeerAction( *self.action_args ) + elif self.action == "broken_pipe": + self.error_handler = _ServerRuntime.BrokenPipeAction() elif self.action == "redirect_to": self.error_handler = _ServerRuntime.RedirectAction(*self.action_args) else: self.error_handler = _ServerRuntime.Expected500ErrorAction() @staticmethod - def from_cgi_params(params): + def from_cgi_params(lock, params): return _ServerRuntime.CountAfter( + lock=lock, count_=_and_then(params.get("count", [None])[0], int), after_=_and_then(params.get("after", [None])[0], int), action_=params.get("action", [None])[0], @@ -232,13 +256,14 @@ class _ServerRuntime: return f"count:{self.count} after:{self.after} action:{self.action} action_args:{self.action_args}" def has_effect(self): - if self.after: - self.after -= 1 - if self.after == 0: - if self.count: - self.count -= 1 - return True - return False + with self.lock: + if self.after: + self.after -= 1 + if self.after == 0: + if self.count: + self.count -= 1 + return True + return False def inject_error(self, request_handler): self.error_handler.inject_error(request_handler) @@ -397,14 +422,16 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if path[1] == "at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.at_part_upload = _ServerRuntime.CountAfter.from_cgi_params(params) + _runtime.at_part_upload = _ServerRuntime.CountAfter.from_cgi_params( + _runtime.lock, params + ) self.log_message("set at_part_upload %s", _runtime.at_part_upload) return self._ok() if path[1] == "at_object_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.at_object_upload = _ServerRuntime.CountAfter.from_cgi_params( - params + _runtime.lock, params ) self.log_message("set at_object_upload %s", _runtime.at_object_upload) return self._ok() @@ -420,6 +447,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if path[1] == "slow_put": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.slow_put = _ServerRuntime.SlowPut( + lock=_runtime.lock, minimal_length_=_and_then(params.get("minimal_length", [None])[0], int), probability_=_and_then(params.get("probability", [None])[0], float), timeout_=_and_then(params.get("timeout", [None])[0], float), @@ -436,7 +464,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if path[1] == "at_create_multi_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.at_create_multi_part_upload = ( - _ServerRuntime.CountAfter.from_cgi_params(params) + _ServerRuntime.CountAfter.from_cgi_params(_runtime.lock, params) ) self.log_message( "set at_create_multi_part_upload %s", @@ -477,7 +505,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if upload_id is not None: if _runtime.at_part_upload is not None: self.log_message( - "put error_at_object_upload %s, %s, %s", + "put at_part_upload %s, %s, %s", _runtime.at_part_upload, upload_id, parts, @@ -492,7 +520,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if _runtime.at_object_upload is not None: if _runtime.at_object_upload.has_effect(): self.log_message( - "put error_at_object_upload %s, %s, %s", + "put error_at_object_upload %s, %s", _runtime.at_object_upload, parts, ) diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 28b0c9beeaa..c40e2a31a8b 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -41,11 +41,6 @@ def broken_s3(init_broken_s3): yield init_broken_s3 -@pytest.fixture(scope="module") -def init_connection_reset_by_peer(cluster): - yield start_s3_mock(cluster, "connection_reset_by_peer", "8084") - - def test_upload_after_check_works(cluster, broken_s3): node = cluster.instances["node"] @@ -397,3 +392,75 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" in error ), error + + +def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="broken_pipe", + ) + + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 7 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="broken_pipe", + ) + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" + in error + ), error From 7d8dc92ed0522e309760037720f6fd8fb3f2542d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 27 Jul 2023 21:07:54 +0000 Subject: [PATCH 102/151] Automatic style fix --- .../test_disallow_concurrency.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index af1b2656227..5c3f06a9d9d 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -156,7 +156,9 @@ def test_concurrent_backups_on_same_node(): ) return else: - raise Exception("Concurrent backups both passed, when one is expected to fail") + raise Exception( + "Concurrent backups both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent backups not supported", @@ -202,7 +204,7 @@ def test_concurrent_backups_on_different_nodes(): assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] result, error = nodes[0].query_and_get_answer_with_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) if not error: @@ -224,7 +226,9 @@ def test_concurrent_backups_on_different_nodes(): ) return else: - raise Exception("Concurrent backups both passed, when one is expected to fail") + raise Exception( + "Concurrent backups both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent backups not supported", @@ -291,8 +295,9 @@ def test_concurrent_restores_on_same_node(): ) return else: - raise Exception("Concurrent restores both passed, when one is expected to fail") - + raise Exception( + "Concurrent restores both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent restores not supported", @@ -359,7 +364,9 @@ def test_concurrent_restores_on_different_node(): ) return else: - raise Exception("Concurrent restores both passed, when one is expected to fail") + raise Exception( + "Concurrent restores both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent restores not supported", From 18c1fd6f08cc2be964ed15604c26a70d7d168561 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 27 Jul 2023 21:24:39 +0000 Subject: [PATCH 103/151] Refactor InDepthQueryTreeVisitorWithContext --- src/Analyzer/InDepthQueryTreeVisitor.h | 158 +++--------------- ...egateFunctionsArithmericOperationsPass.cpp | 8 +- src/Analyzer/Passes/ArrayExistsToHasPass.cpp | 2 +- src/Analyzer/Passes/AutoFinalOnQueryPass.cpp | 2 +- .../Passes/ConvertOrLikeChainPass.cpp | 2 +- src/Analyzer/Passes/ConvertQueryToCNFPass.cpp | 2 +- src/Analyzer/Passes/CountDistinctPass.cpp | 34 ++-- src/Analyzer/Passes/CrossToInnerJoinPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 2 +- src/Analyzer/Passes/FuseFunctionsPass.cpp | 2 +- .../Passes/GroupingFunctionsResolvePass.cpp | 2 +- src/Analyzer/Passes/IfChainToMultiIfPass.cpp | 2 +- .../Passes/IfTransformStringsToEnumPass.cpp | 2 +- .../Passes/LogicalExpressionOptimizerPass.cpp | 2 +- src/Analyzer/Passes/MultiIfToIfPass.cpp | 2 +- .../Passes/NormalizeCountVariantsPass.cpp | 2 +- .../OptimizeGroupByFunctionKeysPass.cpp | 2 +- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 2 +- .../RewriteAggregateFunctionWithIfPass.cpp | 2 +- .../Passes/ShardNumColumnToFunctionPass.cpp | 2 +- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 2 +- .../UniqInjectiveFunctionsEliminationPass.cpp | 2 +- src/Storages/buildQueryTreeForShard.cpp | 2 +- 23 files changed, 55 insertions(+), 185 deletions(-) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index be3a760d4e6..59ee57996c4 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -91,26 +91,25 @@ private: template using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor; -/** Same as InDepthQueryTreeVisitor and additionally keeps track of current scope context. +/** Same as InDepthQueryTreeVisitor (but has a different interface) and additionally keeps track of current scope context. * This can be useful if your visitor has special logic that depends on current scope context. + * + * To specify behavior of the visitor you can implement following methods in derived class: + * 1. needChildVisit – This methods allows to skip subtree. + * 2. enterImpl – This method is called before children are processed. + * 3. leaveImpl – This method is called after children are processed. */ template class InDepthQueryTreeVisitorWithContext { public: - using VisitQueryTreeNodeType = std::conditional_t; + using VisitQueryTreeNodeType = QueryTreeNodePtr; explicit InDepthQueryTreeVisitorWithContext(ContextPtr context, size_t initial_subquery_depth = 0) : current_context(std::move(context)) , subquery_depth(initial_subquery_depth) {} - /// Return true if visitor should traverse tree top to bottom, false otherwise - bool shouldTraverseTopToBottom() const - { - return true; - } - /// Return true if visitor should visit child, false otherwise bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]]) { @@ -147,18 +146,16 @@ public: ++subquery_depth; - bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom(); - if (!traverse_top_to_bottom) - visitChildren(query_tree_node); + getDerived().enterImpl(query_tree_node); - getDerived().visitImpl(query_tree_node); - - if (traverse_top_to_bottom) - visitChildren(query_tree_node); + visitChildren(query_tree_node); getDerived().leaveImpl(query_tree_node); } + void enterImpl(VisitQueryTreeNodeType & node [[maybe_unused]]) + {} + void leaveImpl(VisitQueryTreeNodeType & node [[maybe_unused]]) {} private: @@ -172,85 +169,15 @@ private: return *static_cast(this); } - void visitChildren(VisitQueryTreeNodeType & expression) + bool shouldSkipSubtree( + VisitQueryTreeNodeType & parent, + VisitQueryTreeNodeType & child, + size_t subtree_index) { - for (auto & child : expression->getChildren()) - { - if (!child) - continue; + bool need_visit_child = getDerived().needChildVisit(parent, child); + if (!need_visit_child) + return true; - bool need_visit_child = getDerived().needChildVisit(expression, child); - - if (need_visit_child) - visit(child); - } - } - - ContextPtr current_context; - size_t subquery_depth = 0; -}; - -template -using ConstInDepthQueryTreeVisitorWithContext = InDepthQueryTreeVisitorWithContext; - -/** Visitor that use another visitor to visit node only if condition for visiting node is true. - * For example, your visitor need to visit only query tree nodes or union nodes. - * - * Condition interface: - * struct Condition - * { - * bool operator()(VisitQueryTreeNodeType & node) - * { - * return shouldNestedVisitorVisitNode(node); - * } - * } - */ -template -class InDepthQueryTreeConditionalVisitor : public InDepthQueryTreeVisitor, const_visitor> -{ -public: - using Base = InDepthQueryTreeVisitor, const_visitor>; - using VisitQueryTreeNodeType = typename Base::VisitQueryTreeNodeType; - - explicit InDepthQueryTreeConditionalVisitor(Visitor & visitor_, Condition & condition_) - : visitor(visitor_) - , condition(condition_) - { - } - - bool shouldTraverseTopToBottom() const - { - return visitor.shouldTraverseTopToBottom(); - } - - void visitImpl(VisitQueryTreeNodeType & query_tree_node) - { - if (condition(query_tree_node)) - visitor.visit(query_tree_node); - } - - Visitor & visitor; - Condition & condition; -}; - -template -using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor; - -template -class QueryTreeVisitor -{ -public: - explicit QueryTreeVisitor(ContextPtr context_) - : current_context(std::move(context_)) - {} - - bool needApply(QueryTreeNodePtr & node) - { - return getImpl().needApply(node); - } - - bool shouldSkipSubtree(QueryTreeNodePtr & parent, size_t subtree_index) - { if (auto * table_function_node = parent->as()) { const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes(); @@ -259,58 +186,19 @@ public: return false; } - void visit(QueryTreeNodePtr & node) - { - auto current_scope_context_ptr = current_context; - SCOPE_EXIT( - current_context = std::move(current_scope_context_ptr); - ); - - if (auto * query_node = node->template as()) - current_context = query_node->getContext(); - else if (auto * union_node = node->template as()) - current_context = union_node->getContext(); - - if (!TOP_TO_BOTTOM) - visitChildren(node); - - if (needApply(node)) - getImpl().apply(node); - - if (TOP_TO_BOTTOM) - visitChildren(node); - } - - const ContextPtr & getContext() const - { - return current_context; - } - - const Settings & getSettings() const - { - return current_context->getSettingsRef(); - } -private: - - Impl & getImpl() - { - return *static_cast(this); - } - - void visitChildren(QueryTreeNodePtr & node) + void visitChildren(VisitQueryTreeNodeType & expression) { size_t index = 0; - for (auto & child : node->getChildren()) + for (auto & child : expression->getChildren()) { - if (child && !shouldSkipSubtree(node, index)) + if (child && !shouldSkipSubtree(expression, child, index)) visit(child); ++index; } } - static constexpr bool TOP_TO_BOTTOM = Impl::TOP_TO_BOTTOM; - ContextPtr current_context; + size_t subquery_depth = 0; }; } diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 1476a66c892..3615a632374 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -51,13 +51,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - /// Traverse tree bottom to top - static bool shouldTraverseTopToBottom() - { - return false; - } - - void visitImpl(QueryTreeNodePtr & node) + void leaveImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions) return; diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp index c0f958588f1..a95bcea4fac 100644 --- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp +++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp @@ -22,7 +22,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_rewrite_array_exists_to_has) return; diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 15326ca1dc8..2c89ec9dc20 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -20,7 +20,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().final) return; diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 7d7362fb742..1fada88a21c 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -50,7 +50,7 @@ public: && settings.max_hyperscan_regexp_total_length == 0; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); if (!function_node || function_node->getFunctionName() != "or") diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 4d32c96b845..724448ad742 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -688,7 +688,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * query_node = node->as(); if (!query_node) diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 38f7d07d052..dc58747221e 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -16,17 +16,16 @@ namespace DB namespace { -class CountDistinctVisitor : public QueryTreeVisitor +class CountDistinctVisitor : public InDepthQueryTreeVisitorWithContext { public: - using QueryTreeVisitor::QueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; - static constexpr bool TOP_TO_BOTTOM = true; - - bool needApply(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().count_distinct_optimization) - return false; + return; auto * query_node = node->as(); @@ -34,43 +33,32 @@ public: if (!query_node || (query_node->hasWith() || query_node->hasPrewhere() || query_node->hasWhere() || query_node->hasGroupBy() || query_node->hasHaving() || query_node->hasWindow() || query_node->hasOrderBy() || query_node->hasLimitByLimit() || query_node->hasLimitByOffset() || query_node->hasLimitBy() || query_node->hasLimit() || query_node->hasOffset())) - return false; + return; /// Check that query has only single table expression auto join_tree_node_type = query_node->getJoinTree()->getNodeType(); if (join_tree_node_type == QueryTreeNodeType::JOIN || join_tree_node_type == QueryTreeNodeType::ARRAY_JOIN) - return false; + return; /// Check that query has only single node in projection auto & projection_nodes = query_node->getProjection().getNodes(); if (projection_nodes.size() != 1) - return false; + return; /// Check that query single projection node is `countDistinct` function auto & projection_node = projection_nodes[0]; auto * function_node = projection_node->as(); if (!function_node) - return false; + return; auto lower_function_name = Poco::toLower(function_node->getFunctionName()); if (lower_function_name != "countdistinct" && lower_function_name != "uniqexact") - return false; + return; /// Check that `countDistinct` function has single COLUMN argument auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); if (count_distinct_arguments_nodes.size() != 1 && count_distinct_arguments_nodes[0]->getNodeType() != QueryTreeNodeType::COLUMN) - return false; - - return true; - } - - void apply(QueryTreeNodePtr & node) - { - auto * query_node = node->as(); - auto & projection_nodes = query_node->getProjection().getNodes(); - auto * function_node = projection_nodes[0]->as(); - - auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); + return; auto & count_distinct_argument_column = count_distinct_arguments_nodes[0]; auto & count_distinct_argument_column_typed = count_distinct_argument_column->as(); diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index d4877d23f28..b5ece1a4c49 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -193,7 +193,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!isEnabled()) return; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 696483862e0..cd635f87e0e 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -29,7 +29,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { if (!getSettings().optimize_functions_to_subcolumns) return; diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 14082697955..2cb7afa4ad6 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -37,7 +37,7 @@ public: , names_to_collect(names_to_collect_) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_syntax_fuse_functions) return; diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp index 0cf5310a3ad..577bca8d1ae 100644 --- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp +++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp @@ -46,7 +46,7 @@ public: { } - void visitImpl(const QueryTreeNodePtr & node) + void enterImpl(const QueryTreeNodePtr & node) { auto * function_node = node->as(); if (!function_node || function_node->getFunctionName() != "grouping") diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index 1f97e012331..b0018d474d5 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -23,7 +23,7 @@ public: , multi_if_function_ptr(std::move(multi_if_function_ptr_)) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_if_chain_to_multiif) return; diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 562aff4cf05..901867b8889 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -113,7 +113,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_if_transform_strings_to_enum) return; diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 13f8025f5ea..46056aeaf6f 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -19,7 +19,7 @@ public: : Base(std::move(context)) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index 4672351bcfb..85dd33af8bb 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -21,7 +21,7 @@ public: , if_function_ptr(std::move(if_function_ptr_)) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_multiif_to_if) return; diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index d36be98751c..c85b863a203 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -20,7 +20,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_normalize_count_variants) return; diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp index 5ed52f1210b..2e3f207fdeb 100644 --- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp @@ -26,7 +26,7 @@ public: return !child->as(); } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_group_by_function_keys) return; diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index c6d312d0ecf..875d0c8b5fb 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -28,7 +28,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_redundant_functions_in_order_by) return; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index de264948d4c..38f2fbfa274 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -26,7 +26,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_rewrite_aggregate_function_with_if) return; diff --git a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp index b28816e8ff3..52c30b7b35d 100644 --- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp +++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp @@ -24,7 +24,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * column_node = node->as(); if (!column_node) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index d55af278152..cff9ba1111c 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -26,7 +26,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_rewrite_sum_if_to_count_if) return; diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 5c4484457e8..179bd1c38e4 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -31,7 +31,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_injective_functions_inside_uniq) return; diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 1ee7d747fcc..9929b5bb39b 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -130,7 +130,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); auto * join_node = node->as(); From 6573ba537819ce03dd644ff02bdf7341bcc26d58 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 27 Jul 2023 19:37:28 -0400 Subject: [PATCH 104/151] Temporary returning metadata_cache.xml into tests config --- tests/config/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/install.sh b/tests/config/install.sh index 9aaadbc74a5..50f2627d37c 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -34,6 +34,7 @@ ln -sf $SRC_PATH/config.d/keeper_port.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/lost_forever_check.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/metadata_cache.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/prometheus.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ From b3351bb547b8753b405d820925f8f4270be6132d Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Fri, 28 Jul 2023 03:36:23 +0000 Subject: [PATCH 105/151] partially fixed 01747_system_session_log_long test --- src/Core/PostgreSQLProtocol.h | 49 +++-- tests/config/users.d/session_log_test.xml | 2 +- .../01747_system_session_log_long.reference | 198 ++++++++++++------ .../01747_system_session_log_long.sh | 119 ++++++----- 4 files changed, 224 insertions(+), 144 deletions(-) rename tests/queries/{bugs => 0_stateless}/01747_system_session_log_long.reference (73%) rename tests/queries/{bugs => 0_stateless}/01747_system_session_log_long.sh (78%) diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 8c0654b559f..b0d7646a5f7 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -805,20 +805,9 @@ protected: const String & user_name, const String & password, Session & session, - Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - try - { - session.authenticate(user_name, password, address); - } - catch (const Exception &) - { - mt.send( - Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), - true); - throw; - } + session.authenticate(user_name, password, address); } public: @@ -839,10 +828,10 @@ public: void authenticate( const String & user_name, Session & session, - Messaging::MessageTransport & mt, + [[maybe_unused]] Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) override { - return setPassword(user_name, "", session, mt, address); + return setPassword(user_name, "", session, address); } AuthenticationType getType() const override @@ -866,7 +855,7 @@ public: if (type == Messaging::FrontMessageType::PASSWORD_MESSAGE) { std::unique_ptr password = mt.receive(); - return setPassword(user_name, password->password, session, mt, address); + return setPassword(user_name, password->password, session, address); } else throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, @@ -901,20 +890,30 @@ public: Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - const AuthenticationType user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); - if (type_to_method.find(user_auth_type) != type_to_method.end()) + AuthenticationType user_auth_type; + try { - type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); - mt.send(Messaging::AuthenticationOk(), true); - LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); - return; + user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); + if (type_to_method.find(user_auth_type) != type_to_method.end()) + { + type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); + mt.send(Messaging::AuthenticationOk(), true); + LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); + return; + } + } + catch (const Exception&) + { + mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), + true); + + throw; } - mt.send( - Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), - true); + mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), + true); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication type {} is not supported.", user_auth_type); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication method is not supported: {}", user_auth_type); } }; } diff --git a/tests/config/users.d/session_log_test.xml b/tests/config/users.d/session_log_test.xml index daddaa6e4b9..cc2c2c5fcde 100644 --- a/tests/config/users.d/session_log_test.xml +++ b/tests/config/users.d/session_log_test.xml @@ -17,7 +17,7 @@ - + ::1 127.0.0.1 diff --git a/tests/queries/bugs/01747_system_session_log_long.reference b/tests/queries/0_stateless/01747_system_session_log_long.reference similarity index 73% rename from tests/queries/bugs/01747_system_session_log_long.reference rename to tests/queries/0_stateless/01747_system_session_log_long.reference index 9ecf7e05421..e4f0b6f6076 100644 --- a/tests/queries/bugs/01747_system_session_log_long.reference +++ b/tests/queries/0_stateless/01747_system_session_log_long.reference @@ -4,215 +4,291 @@ TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - No profiles no roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - Two profiles, no roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - Two profiles and two simple roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # plaintext_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # plaintext_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # plaintext_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # sha256_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # sha256_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # sha256_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # double_sha1_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password # double_sha1_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password # double_sha1_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL Logout 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many invalid_session_log_test_xml_user TCP LoginFailure 1 -invalid_session_log_test_xml_user HTTP LoginFailure 1 +invalid_session_log_test_xml_user HTTP LoginFailure many invalid_session_log_test_xml_user MySQL LoginFailure many +invalid_session_log_test_xml_user PostgreSQL LoginFailure many session_log_test_xml_user TCP LoginSuccess 1 session_log_test_xml_user TCP Logout 1 -session_log_test_xml_user HTTP LoginSuccess 1 -session_log_test_xml_user HTTP Logout 1 +session_log_test_xml_user HTTP LoginSuccess many +session_log_test_xml_user HTTP Logout many session_log_test_xml_user MySQL LoginSuccess 1 session_log_test_xml_user MySQL Logout 1 diff --git a/tests/queries/bugs/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh similarity index 78% rename from tests/queries/bugs/01747_system_session_log_long.sh rename to tests/queries/0_stateless/01747_system_session_log_long.sh index 9b127e0b48d..c6e93f4abd7 100755 --- a/tests/queries/bugs/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash # Tags: long, no-parallel, no-fasttest -# Tag no-fasttest: Accesses CH via mysql table function (which is unavailable) ################################################################################################## # Verify that login, logout, and login failure events are properly stored in system.session_log @@ -11,9 +10,8 @@ # Using multiple protocols # * native TCP protocol with CH client # * HTTP with CURL -# * MySQL - CH server accesses itself via mysql table function, query typically fails (unrelated) -# but auth should be performed properly. -# * PostgreSQL - CH server accesses itself via postgresql table function (currently out of order). +# * MySQL - CH server accesses itself via mysql table function. +# * PostgreSQL - CH server accesses itself via postgresql table function, but can't execute query (No LOGIN SUCCESS entry). # * gRPC - not done yet # # There is way to control how many time a query (e.g. via mysql table function) is retried @@ -53,7 +51,7 @@ function reportError() function executeQuery() { - ## Execute query (provided via heredoc or herestring) and print query in case of error. + # Execute query (provided via heredoc or herestring) and print query in case of error. trap 'rm -f ${TMP_QUERY_FILE}; trap - ERR RETURN' RETURN # Since we want to report with current values supplied to this function call # shellcheck disable=SC2064 @@ -82,7 +80,7 @@ trap "cleanup" EXIT function executeQueryExpectError() { cat - > "${TMP_QUERY_FILE}" - ! ${CLICKHOUSE_CLIENT} "${@}" --multiquery --queries-file "${TMP_QUERY_FILE}" 2>&1 | tee -a ${TMP_QUERY_FILE} + ! ${CLICKHOUSE_CLIENT} --multiquery --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a ${TMP_QUERY_FILE} } function createUser() @@ -121,6 +119,8 @@ function createUser() executeQuery < Date: Fri, 28 Jul 2023 07:08:11 +0000 Subject: [PATCH 106/151] use same executor for GET_PART and ATTACH_PART --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d62a1d960e6..2c2cea0af2b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3311,7 +3311,7 @@ bool StorageReplicatedMergeTree::scheduleDataProcessingJob(BackgroundJobsAssigne auto job_type = selected_entry->log_entry->type; /// Depending on entry type execute in fetches (small) pool or big merge_mutate pool - if (job_type == LogEntry::GET_PART) + if (job_type == LogEntry::GET_PART || job_type == LogEntry::ATTACH_PART) { assignee.scheduleFetchTask(std::make_shared( [this, selected_entry] () mutable From 63b05da1f2da6cee086d1154ddc670329aba667d Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Fri, 28 Jul 2023 07:23:34 +0000 Subject: [PATCH 107/151] System logs improvements --- .../settings.md | 128 ++++++++++- docs/en/operations/system-tables/index.md | 4 + .../settings.md | 212 +++++++++++++++--- docs/ru/operations/system-tables/index.md | 4 + programs/server/config.xml | 53 +++++ src/Common/SystemLogBase.cpp | 81 ++++--- src/Common/SystemLogBase.h | 43 +++- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 4 + .../IO/AsynchronousBoundedReadBuffer.cpp | 2 +- .../IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- .../IO/CachedOnDiskWriteBufferFromFile.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 4 +- src/Interpreters/Context.cpp | 6 + src/Interpreters/Context.h | 3 + src/Interpreters/CrashLog.cpp | 5 +- src/Interpreters/CrashLog.h | 5 + src/Interpreters/MetricLog.cpp | 2 +- src/Interpreters/PartLog.cpp | 2 +- src/Interpreters/ProcessorsProfileLog.cpp | 7 - src/Interpreters/ProcessorsProfileLog.h | 7 +- src/Interpreters/Session.cpp | 2 +- src/Interpreters/SessionLog.cpp | 6 +- src/Interpreters/SystemLog.cpp | 92 +++++--- src/Interpreters/SystemLog.h | 17 +- src/Interpreters/TextLog.cpp | 11 +- src/Interpreters/TextLog.h | 13 +- src/Interpreters/ThreadStatusExt.cpp | 4 +- src/Interpreters/TraceCollector.cpp | 2 +- src/Interpreters/TransactionLog.cpp | 2 +- src/Interpreters/TransactionsInfoLog.cpp | 2 +- src/Loggers/Loggers.cpp | 44 +++- src/Loggers/OwnSplitChannel.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- tests/integration/parallel_skip.json | 9 +- .../test_crash_log/configs/crash_log.xml | 16 ++ tests/integration/test_crash_log/test.py | 19 +- .../test_system_flush_logs/test.py | 99 +++++++- .../test_system_logs/test_system_logs.py | 50 +++++ 40 files changed, 794 insertions(+), 178 deletions(-) create mode 100644 tests/integration/test_crash_log/configs/crash_log.xml diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a6ae517e401..e9f0f0dae00 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -512,7 +512,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc cache local_disk /tiny_local_cache/ - 10M + 10M 1M 1 0 @@ -1592,6 +1592,10 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1695,6 +1699,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1706,6 +1718,10 @@ Use the following parameters to configure logging: part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1773,6 +1789,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1786,6 +1810,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1831,6 +1859,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size_rows, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1844,6 +1880,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1861,6 +1901,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1874,6 +1922,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1890,6 +1942,14 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1901,13 +1961,16 @@ Parameters: system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day ``` - ## trace_log {#server_configuration_parameters-trace_log} Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. @@ -1920,6 +1983,12 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1931,6 +2000,10 @@ The default server configuration file `config.xml` contains the following settin trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1945,9 +2018,18 @@ Parameters: - `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) **Example** + ```xml @@ -1955,11 +2037,53 @@ Parameters: asynchronous_insert_log
7500 toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false
``` +## crash_log {#server_configuration_parameters-crash_log} + +Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation. + +Parameters: + +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional) +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). + +The default server configuration file `config.xml` contains the following settings section: + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + false +
+``` + ## query_masking_rules {#query-masking-rules} Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 1b720098fc7..a46f306f677 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -47,6 +47,10 @@ An example: ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 421df3fe3eb..81a696bcfc1 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1058,6 +1058,10 @@ ClickHouse использует потоки из глобального пул metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1155,12 +1159,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. - +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ``` xml @@ -1169,6 +1180,10 @@ ClickHouse использует потоки из глобального пул part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1218,11 +1233,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1234,6 +1257,10 @@ ClickHouse использует потоки из глобального пул query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1245,11 +1272,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1261,6 +1296,10 @@ ClickHouse использует потоки из глобального пул query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1272,11 +1311,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` – имя базы данных. -- `table` – имя системной таблицы, где будут логироваться запросы. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. -- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1288,6 +1335,10 @@ ClickHouse использует потоки из глобального пул query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1297,12 +1348,20 @@ ClickHouse использует потоки из глобального пул Параметры: -- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. -- `database` — имя базы данных для хранения таблицы. -- `table` — имя таблицы, куда будут записываться текстовые сообщения. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ```xml @@ -1312,6 +1371,10 @@ ClickHouse использует потоки из глобального пул system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day @@ -1323,13 +1386,21 @@ ClickHouse использует потоки из глобального пул Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. -Parameters: +Параметры: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. По умолчанию файл настроек сервера `config.xml` содержит следующие настройки: @@ -1339,9 +1410,84 @@ Parameters: trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 ``` +## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log} + +Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок. + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. + +**Пример** + +```xml + + + system + asynchronous_insert_log
+ 7500 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + +
+
+``` + +## crash_log {#server_configuration_parameters-crash_log} + +Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md). + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: true. + +**Пример** + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + true +
+``` + ## query_masking_rules {#query-masking-rules} Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере, diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 7ff368b1910..24f79cae212 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы" ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/programs/server/config.xml b/programs/server/config.xml index 2a7dc1e576a..153cb728bb4 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1026,6 +1026,14 @@ 7500 + + 1048576 + + 8192 + + 524288 + + false @@ -1039,6 +1047,11 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + + false @@ -1084,7 +1109,11 @@ system metric_log
7500 + 1048576 + 8192 + 524288 1000 + false @@ -1151,6 +1196,10 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false + in_order + + 2 + 4 + + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 500 + + diff --git a/tests/integration/test_zookeeper_fallback_session/test.py b/tests/integration/test_zookeeper_fallback_session/test.py new file mode 100644 index 00000000000..570eca4f0a6 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/test.py @@ -0,0 +1,101 @@ +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager + + +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) + +node1 = cluster.add_instance( + "node1", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node2 = cluster.add_instance( + "node2", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node3 = cluster.add_instance( + "node3", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for node in [node1, node2, node3]: + node.query("DROP TABLE IF EXISTS simple SYNC") + node.query( + """ + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id; + """.format( + replica=node.name + ) + ) + yield cluster + finally: + cluster.shutdown() + + +def assert_uses_zk_node(node: ClickHouseInstance, zk_node): + def check_callback(host): + return host.strip() == expected_zk_ip_addr + + expected_zk_ip_addr = node.cluster.get_instance_ip(zk_node) + + host = node.query_with_retry( + "select host from system.zookeeper_connection", check_callback=check_callback + ) + assert host.strip() == expected_zk_ip_addr + + +def test_fallback_session(started_cluster: ClickHouseCluster): + # only leave connecting to zoo3 possible + with PartitionManager() as pm: + for node in started_cluster.instances.values(): + for zk in ["zoo1", "zoo2"]: + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip(zk), + "action": "REJECT --reject-with tcp-reset", + } + ) + + for node in [node1, node2, node3]: + # all nodes will have to switch to zoo3 + assert_uses_zk_node(node, "zoo3") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(1)) + + # and replication still works + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "1", + ) + == "1\n" + ) + + # at this point network partitioning has been reverted. + # the nodes should switch to zoo1 automatically because of `in_order` load-balancing. + # otherwise they would connect to a random replica + for node in [node1, node2, node3]: + assert_uses_zk_node(node, "zoo1") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(2)) + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "2", + ) + == "2\n" + ) From caef499541d2ecdc69354b7444291aa5f44f3482 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jul 2023 12:37:51 +0000 Subject: [PATCH 110/151] fix test --- tests/queries/0_stateless/02833_multiprewhere_extra_column.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql index a786de454ed..3a751294cba 100644 --- a/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql +++ b/tests/queries/0_stateless/02833_multiprewhere_extra_column.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings +-- Tags: no-parallel, no-random-settings, no-random-merge-tree-settings, no-s3-storage drop table if exists t_multi_prewhere; drop row policy if exists policy_02834 on t_multi_prewhere; From ff9e85529ad638f5717670294cfc00eb6aa19140 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 28 Jul 2023 12:20:48 +0000 Subject: [PATCH 111/151] Allow reading zero objects in CachedObjectStorage::readObjects() --- .../Cached/CachedObjectStorage.cpp | 2 - .../02731_zero_objects_in_metadata.reference | 3 ++ .../02731_zero_objects_in_metadata.sh | 37 ++++++++++--------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 3e7c4d12c42..717acb08f62 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -79,8 +79,6 @@ std::unique_ptr CachedObjectStorage::readObjects( /// NO std::optional read_hint, std::optional file_size) const { - if (objects.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Received empty list of objects to read"); return object_storage->readObjects(objects, patchSettings(read_settings), read_hint, file_size); } diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference b/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference index 125915f4f65..1898b2c543b 100644 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.reference @@ -1,3 +1,6 @@ 1 [] 1 [] [] +1 [] +1 [] +[] diff --git a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh index 690cf977d08..eef52002e36 100755 --- a/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh +++ b/tests/queries/0_stateless/02731_zero_objects_in_metadata.sh @@ -5,23 +5,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " -DROP TABLE IF EXISTS test; -CREATE TABLE test (id Int32, empty Array(Int32)) - ENGINE=MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0, disk='s3_disk'; +for DISK in s3_disk s3_cache +do + ${CLICKHOUSE_CLIENT} -n --query " + DROP TABLE IF EXISTS test; + CREATE TABLE test (id Int32, empty Array(Int32)) + ENGINE=MergeTree ORDER BY id + SETTINGS min_rows_for_wide_part=0, min_bytes_for_wide_part=0, disk='$DISK'; -INSERT INTO test (id) VALUES (1); -SELECT * FROM test; -" + INSERT INTO test (id) VALUES (1); + SELECT * FROM test; + " -${CLICKHOUSE_CLIENT} -n --query " -BACKUP TABLE test TO Disk('backups', 'test_s3_backup'); -DROP TABLE test; -RESTORE TABLE test FROM Disk('backups', 'test_s3_backup'); -" &>/dev/null + ${CLICKHOUSE_CLIENT} -n --query " + BACKUP TABLE test TO Disk('backups', 'test_s3_backup'); + DROP TABLE test; + RESTORE TABLE test FROM Disk('backups', 'test_s3_backup'); + " &>/dev/null -${CLICKHOUSE_CLIENT} -n --query " -SELECT * FROM test; -SELECT empty FROM test; -" + ${CLICKHOUSE_CLIENT} -n --query " + SELECT * FROM test; + SELECT empty FROM test; + " +done From 139033247cad342dbdf1f58428921328d5c191aa Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 28 Jul 2023 15:46:45 +0200 Subject: [PATCH 112/151] bypass a limitation of github From bf9504d95d65f23552ac5708c2d0365125269085 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 28 Jul 2023 15:49:55 +0200 Subject: [PATCH 113/151] Move UnlinkMetadataFileOperationOutcome to common header (#52710) --- src/Disks/ObjectStorages/IMetadataStorage.h | 9 ++++++++- .../MetadataStorageFromDiskTransactionOperations.h | 9 --------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 264c481ee08..6b75e157dee 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -22,7 +22,14 @@ namespace ErrorCodes } class IMetadataStorage; -struct UnlinkMetadataFileOperationOutcome; + +/// Return the result of operation to the caller. +/// It is used in `IDiskObjectStorageOperation::finalize` after metadata transaction executed to make decision on blob removal. +struct UnlinkMetadataFileOperationOutcome +{ + UInt32 num_hardlinks = std::numeric_limits::max(); +}; + using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; /// Tries to provide some "transactions" interface, which allow diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index 4662ebc3423..ccb77f6ae7b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -244,15 +244,6 @@ private: std::unique_ptr write_operation; }; -/// Return the result of operation to the caller. -/// It is used in `IDiskObjectStorageOperation::finalize` after metadata transaction executed to make decision on blob removal. -struct UnlinkMetadataFileOperationOutcome -{ - UInt32 num_hardlinks = std::numeric_limits::max(); -}; - -using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; - struct UnlinkMetadataFileOperation final : public IMetadataOperation { const UnlinkMetadataFileOperationOutcomePtr outcome = std::make_shared(); From 7cb853bcd89746c454ac46095bd4e75b155ce1e1 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 28 Jul 2023 14:22:12 +0000 Subject: [PATCH 114/151] Fix TableFunctionNode::cloneImpl --- src/Analyzer/TableFunctionNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index 30644ad4ec4..e5158a06373 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -133,6 +133,7 @@ QueryTreeNodePtr TableFunctionNode::cloneImpl() const result->storage_snapshot = storage_snapshot; result->table_expression_modifiers = table_expression_modifiers; result->settings_changes = settings_changes; + result->unresolved_arguments_indexes = unresolved_arguments_indexes; return result; } From cecc6364496333d52a3d6a7aa6bd1ba55678c2f4 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 28 Jul 2023 19:35:22 +0200 Subject: [PATCH 115/151] Update CachedObjectStorage.cpp --- src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 717acb08f62..298cffaf9b0 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -16,11 +16,6 @@ namespace fs = std::filesystem; namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - CachedObjectStorage::CachedObjectStorage( ObjectStoragePtr object_storage_, FileCachePtr cache_, From 2a40bc9055d446d0d9f04d25b47c96d19f8d37fe Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 28 Jul 2023 23:29:59 +0000 Subject: [PATCH 116/151] Fix: __bitSwapLastTwo was applied to non BoolMask - ActionsDAG was missing convertions to BoolMask - __bitWrapperFunc --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 9c34a149128..56550ab7f3a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -276,7 +277,9 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); filter_actions_dag->removeUnusedActions(); + LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "Filter actions DAG:\n{}", filter_actions_dag->dumpDAG()); actions = std::make_shared(filter_actions_dag); + LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "Dump actions:\n{}", actions->dumpActions()); } else { @@ -290,7 +293,9 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( traverseAST(expression_ast); auto syntax_analyzer_result = TreeRewriter(context).analyze(expression_ast, index_sample_block.getNamesAndTypesList()); + LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "expression AST:\n{}", expression_ast->dumpTree()); actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); + LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "Dump actions:\n{}", actions->dumpActions()); } } @@ -351,6 +356,8 @@ const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDA const ContextPtr & context, std::unordered_map & node_to_result_node) const { + // LOG_DEBUG(&Poco::Logger::get(__FUNCTION__), "Traversing node:\n{}", node.dumpDAG()); + auto result_node_it = node_to_result_node.find(&node); if (result_node_it != node_to_result_node.end()) return *result_node_it->second; @@ -457,8 +464,11 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio if (arguments_size != 1) return nullptr; + auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context); + const auto & bit_wrapper_func_node = result_dag->addFunction(bit_wrapper_function, {arguments[0]}, {}); + auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); - return &result_dag->addFunction(bit_swap_last_two_function, {arguments[0]}, {}); + return &result_dag->addFunction(bit_swap_last_two_function, {&bit_wrapper_func_node}, {}); } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { From 45b96142249c02f71b140b14784969b9a228760c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 28 Jul 2023 23:41:18 +0000 Subject: [PATCH 117/151] Remove debug logging --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 56550ab7f3a..05ea7d15603 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -1,6 +1,5 @@ #include -#include #include #include #include @@ -277,9 +276,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node); filter_actions_dag->removeUnusedActions(); - LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "Filter actions DAG:\n{}", filter_actions_dag->dumpDAG()); actions = std::make_shared(filter_actions_dag); - LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "Dump actions:\n{}", actions->dumpActions()); } else { @@ -293,9 +290,7 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( traverseAST(expression_ast); auto syntax_analyzer_result = TreeRewriter(context).analyze(expression_ast, index_sample_block.getNamesAndTypesList()); - LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "expression AST:\n{}", expression_ast->dumpTree()); actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); - LOG_DEBUG(&Poco::Logger::get("MergeTreeIndexConditionSet"), "Dump actions:\n{}", actions->dumpActions()); } } @@ -356,8 +351,6 @@ const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDA const ContextPtr & context, std::unordered_map & node_to_result_node) const { - // LOG_DEBUG(&Poco::Logger::get(__FUNCTION__), "Traversing node:\n{}", node.dumpDAG()); - auto result_node_it = node_to_result_node.find(&node); if (result_node_it != node_to_result_node.end()) return *result_node_it->second; From a6fc632aa18f45ad141d3f524bf1492d0450060a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 28 Jul 2023 23:57:56 +0000 Subject: [PATCH 118/151] More test queries + update file with broken tests --- tests/analyzer_tech_debt.txt | 1 - tests/queries/0_stateless/00979_set_index_not.reference | 2 ++ tests/queries/0_stateless/00979_set_index_not.sql | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 737e0e0a5e4..c8f2bb9f43d 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -11,7 +11,6 @@ 00927_asof_joins 00940_order_by_read_in_order_query_plan 00945_bloom_filter_index -00979_set_index_not 00981_in_subquery_with_tuple 01049_join_low_card_bug_long 01062_pm_all_join_with_block_continuation diff --git a/tests/queries/0_stateless/00979_set_index_not.reference b/tests/queries/0_stateless/00979_set_index_not.reference index 455708dfe99..033fcb0467b 100644 --- a/tests/queries/0_stateless/00979_set_index_not.reference +++ b/tests/queries/0_stateless/00979_set_index_not.reference @@ -1,2 +1,4 @@ Jon alive Jon alive +Ramsey rip +Ramsey rip diff --git a/tests/queries/0_stateless/00979_set_index_not.sql b/tests/queries/0_stateless/00979_set_index_not.sql index 2ad27e337f9..13a0b4cbb09 100644 --- a/tests/queries/0_stateless/00979_set_index_not.sql +++ b/tests/queries/0_stateless/00979_set_index_not.sql @@ -11,5 +11,7 @@ insert into set_index_not values ('Jon','alive'),('Ramsey','rip'); select * from set_index_not where status!='rip'; select * from set_index_not where NOT (status ='rip'); +select * from set_index_not where NOT (status!='rip'); +select * from set_index_not where NOT (NOT (status ='rip')); DROP TABLE set_index_not; From 553304a81ac0063f4d60d178ef9b1763fd2b083c Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sat, 29 Jul 2023 11:15:56 +0200 Subject: [PATCH 119/151] Fixed clang tidy build by removing unued variable --- src/Storages/StorageSnapshot.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 6abca59268f..c0e85900794 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -120,9 +120,8 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) cons const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & column_name : column_names) { - std::string substituted_column_name = column_name; - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); + auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, column_name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, column_name); if (column && !object_column) { res.insert({column->type->createColumn(), column->type, column_name}); @@ -141,7 +140,7 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) cons else { throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(substituted_column_name), storage.getStorageID().getNameForLogs()); + "Column {} not found in table {}", backQuote(column_name), storage.getStorageID().getNameForLogs()); } } return res; From 84ab4238426b96fb126be8c59055657ed104c1c2 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 28 Jul 2023 18:18:38 +0200 Subject: [PATCH 120/151] Allow reading empty file with no blobs --- src/Disks/ObjectStorages/DiskObjectStorage.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 762151b3808..6bb6982065a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -485,8 +486,15 @@ std::unique_ptr DiskObjectStorage::readFile( std::optional read_hint, std::optional file_size) const { + auto storage_objects = metadata_storage->getStorageObjects(path); + + const bool file_can_be_empty = !file_size.has_value() || *file_size == 0; + + if (storage_objects.empty() && file_can_be_empty) + return std::make_unique(); + return object_storage->readObjects( - metadata_storage->getStorageObjects(path), + storage_objects, object_storage->getAdjustedSettingsFromMetadataFile(settings, path), read_hint, file_size); From cbb546a1a97b4d9819efa82547b48103a40d575d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 00:18:08 +0300 Subject: [PATCH 121/151] Update easy_tasks_sorted_ru.md --- tests/instructions/easy_tasks_sorted_ru.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/instructions/easy_tasks_sorted_ru.md b/tests/instructions/easy_tasks_sorted_ru.md index 09ea48d0bd9..17e9708eef5 100644 --- a/tests/instructions/easy_tasks_sorted_ru.md +++ b/tests/instructions/easy_tasks_sorted_ru.md @@ -129,7 +129,7 @@ position с конца строки. Атомарно удаляет таблицу перед созданием новой, если такая была. -## * Приведение типов для IN (subquery). +## + Приведение типов для IN (subquery). `SELECT 1 IN (SELECT -1 UNION ALL SELECT 1)` @@ -205,12 +205,12 @@ https://clickhouse.com/docs/en/operations/table_engines/external_data/ ## Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации. -## Настройка rollup_use_nulls. +## + Настройка rollup_use_nulls. + +Upd: it is named "group_by_use_nulls". ## + Настройка cast_keep_nullable. -## Функция bitEquals для сравнения произвольных типов данных побитово. +## Функция bitEquals для сравнения произвольных типов данных побитово ## Функция serialize для implementation specific non portable non backwards compatible сериализации любого типа данных в набор байт. - -## Функция bitEquals и оператор <=>. From 63b155a74111e7f2b1b94808e8f9aa47374bc71f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 29 Jul 2023 23:12:54 +0000 Subject: [PATCH 122/151] bypass merging attempt for non 'clickhouse' configurations with different root node names --- src/Common/Config/ConfigProcessor.cpp | 13 +++++++++++-- src/Common/Config/ConfigProcessor.h | 4 +++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index a55183782d8..04f55600b40 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -328,7 +328,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } } -void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with) +bool ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with) { Node * config_root = getRootNode(config.get()); Node * with_root = getRootNode(with.get()); @@ -343,11 +343,15 @@ void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with) && !((config_root_node_name == "yandex" || config_root_node_name == "clickhouse") && (merged_root_node_name == "yandex" || merged_root_node_name == "clickhouse"))) { + if (config_root_node_name != "clickhouse" && config_root_node_name != "yandex") + return false; + throw Poco::Exception("Root element doesn't have the corresponding root element as the config file." " It must be <" + config_root->nodeName() + ">"); } mergeRecursive(config, config_root, with_root); + return true; } void ConfigProcessor::doIncludesRecursive( @@ -645,7 +649,12 @@ XMLDocumentPtr ConfigProcessor::processConfig( with = dom_parser.parse(merge_file); } - merge(config, with); + if (!merge(config, with)) + { + LOG_DEBUG(log, "Merging bypassed - configuration file '{}' doesn't belong to configuration '{}' - merging root node name '{}' doesn't match '{}'", + merge_file, path, getRootNode(with.get())->nodeName(), getRootNode(config.get())->nodeName()); + continue; + } contributing_files.push_back(merge_file); } diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 060ef49d36a..b4f85b10526 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -144,7 +144,9 @@ private: void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); - void merge(XMLDocumentPtr config, XMLDocumentPtr with); + /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. + /// For compatibility root node 'yandex' considered equal to 'clickhouse'. + bool merge(XMLDocumentPtr config, XMLDocumentPtr with); void doIncludesRecursive( XMLDocumentPtr config, From 4d27cf4386e1b3b3d0b1516723fbef0f9143ebcc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 02:11:32 +0200 Subject: [PATCH 123/151] Proper stripping --- cmake/split_debug_symbols.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake index 8ba7669d9e2..d6821eb6c48 100644 --- a/cmake/split_debug_symbols.cmake +++ b/cmake/split_debug_symbols.cmake @@ -24,7 +24,7 @@ macro(clickhouse_split_debug_symbols) COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check. # Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces. - COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash --keep-section=.symtab --keep-section=.strtab --keep-section=.shstrtab "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" # Associate stripped binary with debug symbols: COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM From a4f2eea51603bb98340e8c6add215ca8b7fc676d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 02:19:39 +0200 Subject: [PATCH 124/151] Maybe better --- utils/check-style/check-style | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 67c185a0b54..b728602ef40 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -12,6 +12,7 @@ # (simple regexps) to check if the code is likely to have basic style violations. # and then to run formatter only for the specified files. +LC_ALL="en_US.UTF-8" ROOT_PATH=$(git rev-parse --show-toplevel) EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' From 1deebd3ae624232e2b9ae05e7b1350816ef490ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 04:22:36 +0200 Subject: [PATCH 125/151] Fix Docker --- docker/test/base/Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index f6836804454..bfbe9461c1e 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update \ pv \ ripgrep \ zstd \ + locales \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) @@ -28,7 +29,13 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + ENV TZ=Europe/Moscow RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 + + +RUN apt-get update && apt-get install -y locales From a336f06f57542015ba63043a4bdf7261ee1f0ac3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 05:46:05 +0300 Subject: [PATCH 126/151] Update Context.cpp --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 123c2ab8f85..8b8333524b8 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1581,7 +1581,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const { auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); - StorageView::replaceQueryParametersIfParametrizedView(query,parameterized_view_values); + StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); ASTCreateQuery create; create.select = query->as(); From 972653cde3045d309c2bff180630cedefa9b10b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 05:18:52 +0200 Subject: [PATCH 127/151] Change the default timezones in Docker test images --- docker/test/base/Dockerfile | 2 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/Dockerfile | 2 +- docker/test/performance-comparison/Dockerfile | 2 +- docker/test/stateless/Dockerfile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index f6836804454..17aed0d4402 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -28,7 +28,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_de ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index da4baa8c687..ad24e662a6c 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -32,7 +32,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index aa71074c02a..0bc0fb06633 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index ab9f1f8a2e3..cfd7c613868 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -11,7 +11,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index e1e84c427ba..6f44c7cc88b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -52,7 +52,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 From ecfde7edc6ad3292739ada60ec36c8f9413f9ffe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 05:20:51 +0200 Subject: [PATCH 128/151] Fix Docker --- docker/test/style/Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 2aa0b1a62d6..bd1c26855b7 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -18,9 +18,13 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ + locales \ && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ - && rm -rf /root/.cache/pip + && rm -rf /root/.cache/pip + +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 424fe43090fce65d564997452a85e5d617271b52 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 06:57:11 +0300 Subject: [PATCH 129/151] Update Dockerfile --- docker/test/base/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index bfbe9461c1e..6ba44662af9 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -36,6 +36,3 @@ ENV TZ=Europe/Moscow RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 - - -RUN apt-get update && apt-get install -y locales From 85387912ee3c1f5345b8c5b95181e1325124eb6d Mon Sep 17 00:00:00 2001 From: xiebin Date: Sun, 30 Jul 2023 12:06:30 +0800 Subject: [PATCH 130/151] just remove the first bad query, because automatically converting the layout simple to complex with two keys now. --- .../01018_ddl_dictionaries_bad_queries.reference | 1 - .../01018_ddl_dictionaries_bad_queries.sh | 15 --------------- 2 files changed, 16 deletions(-) diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference index abc3218ce6c..5ee4e7592f6 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.reference @@ -4,4 +4,3 @@ 2 2 2 -2 diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh index 6826ed677db..8a40129ad4f 100755 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_bad_queries.sh @@ -9,21 +9,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP DICTIONARY IF EXISTS dict1" -# Simple layout, but with two keys -$CLICKHOUSE_CLIENT -q " - CREATE DICTIONARY dict1 - ( - key1 UInt64, - key2 UInt64, - value String - ) - PRIMARY KEY key1, key2 - LAYOUT(HASHED()) - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict1' DB '$CLICKHOUSE_DATABASE')) - LIFETIME(MIN 1 MAX 10) -" 2>&1 | grep -c 'Primary key for simple dictionary must contain exactly one element' - - # Simple layout, but with non existing key $CLICKHOUSE_CLIENT -q " CREATE DICTIONARY dict1 From 41ffc097711b9ce3c3a26ba0fdc38315e3a39363 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 09:18:06 +0200 Subject: [PATCH 131/151] Upload build statistics to the CI database --- tests/ci/build_check.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 35b98a7c3bb..9e44af5296c 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -35,6 +35,11 @@ from version_helper import ( get_version_from_repo, update_version_local, ) +from clickhouse_helper import ( + ClickHouseHelper, + prepare_tests_results_for_clickhouse, +) +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" @@ -268,6 +273,7 @@ def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None: def main(): logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() build_name = sys.argv[1] build_config = CI_CONFIG["build_config"][build_name] @@ -394,7 +400,20 @@ def main(): ) upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) - # Fail build job if not successeded + + ch_helper = ClickHouseHelper() + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [], + "success" if success else "failure", + stopwatch.duration_seconds, + stopwatch.start_time_str, + log_url, + f"Build ({build_name})", + ) + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + # Fail the build job if it didn't succeed if not success: sys.exit(1) From 448756d14872cc89cc08aedf80e214941d1fdd4e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 09:31:21 +0200 Subject: [PATCH 132/151] Add machine_type information to the CI database --- tests/ci/clickhouse_helper.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 9410b37d69f..eca5f6f399d 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -132,6 +132,21 @@ class ClickHouseHelper: return result +# Obtain the machine type from IMDS: +def get_instance_type(): + url = "http://169.254.169.254/latest/meta-data/instance-type" + for i in range(5): + try: + response = requests.get(url, timeout=1) + if response.status_code == 200: + return response.text + except Exception as e: + error = f"Received exception while sending data to {url} on {i} attempt: {e}" + logging.warning(error) + continue + return '' + + def prepare_tests_results_for_clickhouse( pr_info: PRInfo, test_results: TestResults, @@ -168,6 +183,7 @@ def prepare_tests_results_for_clickhouse( head_ref=head_ref, head_repo=head_repo, task_url=pr_info.task_url, + instance_type=get_instance_type(), ) # Always publish a total record for all checks. For checks with individual From 358773c620ad091458c63de1e37307a053f044c9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 30 Jul 2023 07:45:27 +0000 Subject: [PATCH 133/151] Automatic style fix --- tests/ci/clickhouse_helper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index eca5f6f399d..7f98f6e52d2 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -141,10 +141,12 @@ def get_instance_type(): if response.status_code == 200: return response.text except Exception as e: - error = f"Received exception while sending data to {url} on {i} attempt: {e}" + error = ( + f"Received exception while sending data to {url} on {i} attempt: {e}" + ) logging.warning(error) continue - return '' + return "" def prepare_tests_results_for_clickhouse( From bc1f80dd82d2992aa108f323af4349bd2f4c9968 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 10:25:44 +0200 Subject: [PATCH 134/151] Remove Coverity (part 2) --- docker/packager/binary/build.sh | 17 ----------------- docker/packager/packager | 7 +------ tests/ci/ci_config.py | 11 ----------- 3 files changed, 1 insertion(+), 34 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c74147..ce3f729fe33 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -80,16 +80,6 @@ else cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - mkdir -p /workdir/cov-analysis - - wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1 - export PATH=$PATH:/workdir/cov-analysis/bin - cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" - SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" -fi - # No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. $SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET @@ -175,13 +165,6 @@ then mv "$COMBINED_OUTPUT.tar.zst" /output fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - # Coverity does not understand ZSTD. - tar -cvz -f "coverity-scan.tar.gz" cov-int - mv "coverity-scan.tar.gz" /output -fi - ccache_status ccache --evict-older-than 1d diff --git a/docker/packager/packager b/docker/packager/packager index e12bd55dde3..399e12e6680 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -253,11 +253,6 @@ def parse_env_variables( cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") - # Create combined output archive for performance tests. - if package_type == "coverity": - result.append("COMBINED_OUTPUT=coverity") - result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') - if sanitizer: result.append(f"SANITIZER={sanitizer}") if build_type: @@ -356,7 +351,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--package-type", - choices=["deb", "binary", "coverity"], + choices=["deb", "binary"], required=True, ) parser.add_argument( diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index db9a7f926be..bea654ca76f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -19,16 +19,6 @@ CI_CONFIG = { "with_coverage": False, "comment": "", }, - "coverity": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "coverity", - "tidy": "disable", - "with_coverage": False, - "official": False, - "comment": "A special build for coverity", - }, "package_aarch64": { "compiler": "clang-16-aarch64", "build_type": "", @@ -187,7 +177,6 @@ CI_CONFIG = { "builds_report_config": { "ClickHouse build check": [ "package_release", - "coverity", "package_aarch64", "package_asan", "package_ubsan", From 6bff19bd824bb38d026b12b000a76e7b47c098f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jul 2023 10:27:33 +0200 Subject: [PATCH 135/151] Remove Coverity (part 2) --- docker/packager/binary/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index ce3f729fe33..640654e5445 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -82,7 +82,7 @@ fi # No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. -$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET +ninja $NINJA_FLAGS $BUILD_TARGET ls -la ./programs From 08ef4d015c6b42061f6de467279f0f1207913435 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 30 Jul 2023 13:36:52 +0200 Subject: [PATCH 136/151] Minor change --- docs/en/operations/system-tables/events.md | 2 +- docs/en/operations/system-tables/metrics.md | 2 +- src/Storages/System/StorageSystemEvents.h | 1 + src/Storages/System/StorageSystemMetrics.h | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index b53628a8384..26f5cb2031b 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -10,7 +10,7 @@ Columns: - `event` ([String](../../sql-reference/data-types/string.md)) — Event name. - `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. - `description` ([String](../../sql-reference/data-types/string.md)) — Event description. -- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for event. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`. **Example** diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index ea36411cf3d..6771e8d7aad 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -10,7 +10,7 @@ Columns: - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. - `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. -- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for metric. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`. The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index ed80a441529..b2e4bac072a 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -18,6 +18,7 @@ public: static NamesAndTypesList getNamesAndTypes(); static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index bffc7e6f1fc..e3e2c07014f 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -19,6 +19,7 @@ public: static NamesAndTypesList getNamesAndTypes(); static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; From aba291ca688fa58d82efc329e7be6d1c2e638f4a Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 30 Jul 2023 14:01:34 +0200 Subject: [PATCH 137/151] Revert "Revert "Add an ability to specify allocations size for sampling memory profiler"" --- programs/server/Server.cpp | 21 ++++++++--- src/Common/MemoryTracker.cpp | 10 ++++- src/Common/MemoryTracker.h | 18 +++++++++ src/Core/ServerSettings.h | 8 +++- src/Core/Settings.h | 4 +- src/Interpreters/ProcessList.cpp | 3 ++ src/Interpreters/ThreadStatusExt.cpp | 2 + .../__init__.py | 1 + .../configs/max_untracked_memory.xml | 7 ++++ .../configs/memory_profiler.xml | 5 +++ .../test.py | 37 +++++++++++++++++++ ...r_sample_min_max_allocation_size.reference | 1 + ...profiler_sample_min_max_allocation_size.sh | 18 +++++++++ 13 files changed, 124 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml create mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py create mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference create mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 405ebf7fb2f..85ee48ea075 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1691,17 +1691,26 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); - if (total_memory_profiler_step) + if (server_settings.total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(total_memory_profiler_step); + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); } - double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); - if (total_memory_tracker_sample_probability > 0.0) + if (server_settings.total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); } + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + { + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + } + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + { + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 81cac2617c5..52cae0768dc 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); @@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size) } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); @@ -534,6 +534,12 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) ; } +bool MemoryTracker::isSizeOkForSampling(UInt64 size) const +{ + /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation + return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); +} + bool canEnqueueBackgroundTask() { auto limit = background_memory_tracker.getSoftLimit(); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 3ea1ea8702c..6f576e53e7a 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -67,6 +67,12 @@ private: /// To randomly sample allocations and deallocations in trace_log. double sample_probability = 0; + /// Randomly sample allocations only larger or equal to this size + UInt64 min_allocation_size_bytes = 0; + + /// Randomly sample allocations only smaller or equal to this size + UInt64 max_allocation_size_bytes = 0; + /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. std::atomic parent {}; @@ -88,6 +94,8 @@ private: void setOrRaiseProfilerLimit(Int64 value); + bool isSizeOkForSampling(UInt64 size) const; + /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); @@ -166,6 +174,16 @@ public: sample_probability = value; } + void setSampleMinAllocationSize(UInt64 value) + { + min_allocation_size_bytes = value; + } + + void setSampleMaxAllocationSize(UInt64 value) + { + max_allocation_size_bytes = value; + } + void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index ca27cbdbf19..7678e8c3f24 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -83,8 +83,12 @@ namespace DB M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) - + M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ + \ + M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ + M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c69d132ea25..26e0808aaa3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -427,7 +427,9 @@ class IColumn; M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ - M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 1503e396298..c299572ef41 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); + thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 7a6bc45c118..1b39bb6c0dc 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,6 +83,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml new file mode 100644 index 00000000000..56fc5ed34ca --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml new file mode 100644 index 00000000000..5b3e17d145f --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml @@ -0,0 +1,5 @@ + + 1 + 4096 + 8192 + diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py new file mode 100644 index 00000000000..6ab971fa9c4 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -0,0 +1,37 @@ +from helpers.cluster import ClickHouseCluster +import pytest + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/memory_profiler.xml"], + user_configs=["configs/max_untracked_memory.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_trace_boundaries_work(started_cluster): + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") + node.query("SYSTEM FLUSH LOGS") + + assert ( + node.query( + "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" + ) + == "1\n" + ) + assert ( + node.query( + "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" + ) + == "0\n" + ) diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh new file mode 100755 index 00000000000..b1fbea26da7 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# requires TraceCollector, does not available under sanitizers and aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" +${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + +# at least something allocated +${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" + +# show wrong allocations +${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" From 3e5767853d824034221b981ef7c3ad29f0e0ce77 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 30 Jul 2023 14:15:17 +0200 Subject: [PATCH 138/151] Disable test for sanitizers --- tests/integration/test_memory_profiler_min_max_borders/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py index 6ab971fa9c4..df9439bc2bb 100644 --- a/tests/integration/test_memory_profiler_min_max_borders/test.py +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -20,6 +20,9 @@ def started_cluster(): def test_trace_boundaries_work(started_cluster): + if node.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") node.query("SYSTEM FLUSH LOGS") From e58b3cfd3246681092908dc6e38d0b9a130b5df7 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Sun, 30 Jul 2023 22:09:03 +0000 Subject: [PATCH 139/151] Implemented max sessions for user --- .../operations/settings/query-complexity.md | 36 +++ .../operations/settings/settings-profiles.md | 4 +- .../operations/settings/query-complexity.md | 37 +++ .../operations/settings/settings-profiles.md | 3 +- src/Access/ContextAccess.cpp | 32 ++- src/Access/SettingsConstraints.cpp | 81 +++++-- src/Access/SettingsConstraints.h | 23 +- src/Client/Suggest.cpp | 3 + src/Common/ErrorCodes.cpp | 1 + src/Common/SettingSource.h | 43 ++++ src/Core/Settings.h | 2 + .../Access/InterpreterCreateRoleQuery.cpp | 2 +- .../InterpreterCreateSettingsProfileQuery.cpp | 2 +- .../Access/InterpreterCreateUserQuery.cpp | 2 +- src/Interpreters/Context.cpp | 37 ++- src/Interpreters/Context.h | 17 +- src/Interpreters/InterpreterSetQuery.cpp | 4 +- src/Interpreters/Session.cpp | 39 ++- src/Interpreters/Session.h | 3 + src/Interpreters/SessionTracker.cpp | 62 +++++ src/Interpreters/SessionTracker.h | 60 +++++ src/Server/GRPCServer.cpp | 4 +- src/Server/HTTPHandler.cpp | 2 +- src/Server/TCPHandler.cpp | 21 +- tests/integration/parallel_skip.json | 12 +- .../__init__.py | 0 .../configs/dhparam.pem | 8 + .../configs/log.xml | 9 + .../configs/ports.xml | 9 + .../configs/server.crt | 18 ++ .../configs/server.key | 28 +++ .../configs/ssl_conf.xml | 17 ++ .../configs/users.xml | 16 ++ .../protos/clickhouse_grpc.proto | 1 + .../test.py | 222 ++++++++++++++++++ ...2832_alter_max_sessions_for_user.reference | 12 + .../02832_alter_max_sessions_for_user.sh | 64 +++++ 37 files changed, 851 insertions(+), 85 deletions(-) create mode 100644 src/Common/SettingSource.h create mode 100644 src/Interpreters/SessionTracker.cpp create mode 100644 src/Interpreters/SessionTracker.h create mode 100755 tests/integration/test_profile_max_sessions_for_user/__init__.py create mode 100755 tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem create mode 100644 tests/integration/test_profile_max_sessions_for_user/configs/log.xml create mode 100644 tests/integration/test_profile_max_sessions_for_user/configs/ports.xml create mode 100755 tests/integration/test_profile_max_sessions_for_user/configs/server.crt create mode 100755 tests/integration/test_profile_max_sessions_for_user/configs/server.key create mode 100644 tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml create mode 100644 tests/integration/test_profile_max_sessions_for_user/configs/users.xml create mode 120000 tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto create mode 100755 tests/integration/test_profile_max_sessions_for_user/test.py create mode 100644 tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference create mode 100755 tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 163ed5d5826..3af368b87a7 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -327,3 +327,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all Zero means unlimited. Default value: 0. + +## max_sessions_for_user {#max-sessions-per-user} + +Maximum number of simultaneous sessions per authenticated user to the ClickHouse server. + +Example: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_user + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Default value: 0 (Infinite count of simultaneous sessions). diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 2f39a75453c..d08266b8ef8 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ Example: 8
- + 1000000000 100000000000 @@ -67,6 +67,8 @@ Example: 50 100 + 4 + 1 diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index de9bb969085..5f3f4b74f51 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -314,3 +314,40 @@ FORMAT Null; При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом: > «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).» + +## max_sessions_for_user {#max-sessions-per-user} + +Максимальное количество одновременных сессий на одного аутентифицированного пользователя. + +Пример: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_profile + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Значение по умолчанию: 0 (неограниченное количество сессий). diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md index ba2cb9a601f..0d094c637ac 100644 --- a/docs/ru/operations/settings/settings-profiles.md +++ b/docs/ru/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ SET profile = 'web' 8 - + 1000000000 100000000000 @@ -67,6 +67,7 @@ SET profile = 'web' 50 100 + 4 1 diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 9e9d8644539..e277c49e39d 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -328,9 +328,6 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & enabled_row_policies = access_control->getEnabledRowPolicies(*params.user_id, roles_info->enabled_roles); - enabled_quota = access_control->getEnabledQuota( - *params.user_id, user_name, roles_info->enabled_roles, params.address, params.forwarded_address, params.quota_key); - enabled_settings = access_control->getEnabledSettings( *params.user_id, user->settings, roles_info->enabled_roles, roles_info->settings_from_enabled_roles); @@ -416,19 +413,32 @@ RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, co std::shared_ptr ContextAccess::getQuota() const { std::lock_guard lock{mutex}; - if (enabled_quota) - return enabled_quota; - static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota(); - return unlimited_quota; + + if (!enabled_quota) + { + if (roles_info) + { + enabled_quota = access_control->getEnabledQuota(*params.user_id, + user_name, + roles_info->enabled_roles, + params.address, + params.forwarded_address, + params.quota_key); + } + else + { + static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota(); + return unlimited_quota; + } + } + + return enabled_quota; } std::optional ContextAccess::getQuotaUsage() const { - std::lock_guard lock{mutex}; - if (enabled_quota) - return enabled_quota->getUsage(); - return {}; + return getQuota()->getUsage(); } diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 12f584cab83..74c6bbe0353 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +22,39 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +namespace +{ +struct SettingSourceRestrictions +{ + constexpr SettingSourceRestrictions() { allowed_sources.set(); } + + constexpr SettingSourceRestrictions(std::initializer_list allowed_sources_) + { + for (auto allowed_source : allowed_sources_) + setSourceAllowed(allowed_source, true); + } + + constexpr bool isSourceAllowed(SettingSource source) { return allowed_sources[source]; } + constexpr void setSourceAllowed(SettingSource source, bool allowed) { allowed_sources[source] = allowed; } + + std::bitset allowed_sources; +}; + +const std::unordered_map SETTINGS_SOURCE_RESTRICTIONS = { + {"max_sessions_for_user", {SettingSource::PROFILE}}, +}; + +SettingSourceRestrictions getSettingSourceRestrictions(std::string_view name) +{ + auto settingConstraintIter = SETTINGS_SOURCE_RESTRICTIONS.find(name); + if (settingConstraintIter != SETTINGS_SOURCE_RESTRICTIONS.end()) + return settingConstraintIter->second; + else + return SettingSourceRestrictions(); // allows everything +} + +} + SettingsConstraints::SettingsConstraints(const AccessControl & access_control_) : access_control(&access_control_) { } @@ -98,7 +133,7 @@ void SettingsConstraints::merge(const SettingsConstraints & other) } -void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const +void SettingsConstraints::check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const { for (const auto & element : profile_elements) { @@ -108,19 +143,19 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting if (element.value) { SettingChange value(element.setting_name, *element.value); - check(current_settings, value); + check(current_settings, value, source); } if (element.min_value) { SettingChange value(element.setting_name, *element.min_value); - check(current_settings, value); + check(current_settings, value, source); } if (element.max_value) { SettingChange value(element.setting_name, *element.max_value); - check(current_settings, value); + check(current_settings, value, source); } SettingConstraintWritability new_value = SettingConstraintWritability::WRITABLE; @@ -142,24 +177,24 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting } } -void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change) const +void SettingsConstraints::check(const Settings & current_settings, const SettingChange & change, SettingSource source) const { - checkImpl(current_settings, const_cast(change), THROW_ON_VIOLATION); + checkImpl(current_settings, const_cast(change), THROW_ON_VIOLATION, source); } -void SettingsConstraints::check(const Settings & current_settings, const SettingsChanges & changes) const +void SettingsConstraints::check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const { for (const auto & change : changes) - check(current_settings, change); + check(current_settings, change, source); } -void SettingsConstraints::check(const Settings & current_settings, SettingsChanges & changes) const +void SettingsConstraints::check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const { boost::range::remove_erase_if( changes, [&](SettingChange & change) -> bool { - return !checkImpl(current_settings, const_cast(change), THROW_ON_VIOLATION); + return !checkImpl(current_settings, const_cast(change), THROW_ON_VIOLATION, source); }); } @@ -174,13 +209,13 @@ void SettingsConstraints::check(const MergeTreeSettings & current_settings, cons check(current_settings, change); } -void SettingsConstraints::clamp(const Settings & current_settings, SettingsChanges & changes) const +void SettingsConstraints::clamp(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const { boost::range::remove_erase_if( changes, [&](SettingChange & change) -> bool { - return !checkImpl(current_settings, change, CLAMP_ON_VIOLATION); + return !checkImpl(current_settings, change, CLAMP_ON_VIOLATION, source); }); } @@ -215,7 +250,10 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel return true; } -bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const +bool SettingsConstraints::checkImpl(const Settings & current_settings, + SettingChange & change, + ReactionOnViolation reaction, + SettingSource source) const { std::string_view setting_name = Settings::Traits::resolveName(change.name); @@ -247,7 +285,7 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh if (!getNewValueToCheck(current_settings, change, new_value, reaction == THROW_ON_VIOLATION)) return false; - return getChecker(current_settings, setting_name).check(change, new_value, reaction); + return getChecker(current_settings, setting_name).check(change, new_value, reaction, source); } bool SettingsConstraints::checkImpl(const MergeTreeSettings & current_settings, SettingChange & change, ReactionOnViolation reaction) const @@ -255,10 +293,13 @@ bool SettingsConstraints::checkImpl(const MergeTreeSettings & current_settings, Field new_value; if (!getNewValueToCheck(current_settings, change, new_value, reaction == THROW_ON_VIOLATION)) return false; - return getMergeTreeChecker(change.name).check(change, new_value, reaction); + return getMergeTreeChecker(change.name).check(change, new_value, reaction, SettingSource::QUERY); } -bool SettingsConstraints::Checker::check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const +bool SettingsConstraints::Checker::check(SettingChange & change, + const Field & new_value, + ReactionOnViolation reaction, + SettingSource source) const { if (!explain.empty()) { @@ -326,6 +367,14 @@ bool SettingsConstraints::Checker::check(SettingChange & change, const Field & n change.value = max_value; } + if (!getSettingSourceRestrictions(setting_name).isSourceAllowed(source)) + { + if (reaction == THROW_ON_VIOLATION) + throw Exception(ErrorCodes::READONLY, "Setting {} is not allowed to be set by {}", setting_name, toString(source)); + else + return false; + } + return true; } diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index 667d57a90ae..d09e60cc9d5 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace Poco::Util @@ -73,17 +74,18 @@ public: void merge(const SettingsConstraints & other); /// Checks whether `change` violates these constraints and throws an exception if so. - void check(const Settings & current_settings, const SettingsProfileElements & profile_elements) const; - void check(const Settings & current_settings, const SettingChange & change) const; - void check(const Settings & current_settings, const SettingsChanges & changes) const; - void check(const Settings & current_settings, SettingsChanges & changes) const; + void check(const Settings & current_settings, const SettingsProfileElements & profile_elements, SettingSource source) const; + void check(const Settings & current_settings, const SettingChange & change, SettingSource source) const; + void check(const Settings & current_settings, const SettingsChanges & changes, SettingSource source) const; + void check(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const; /// Checks whether `change` violates these constraints and throws an exception if so. (setting short name is expected inside `changes`) void check(const MergeTreeSettings & current_settings, const SettingChange & change) const; void check(const MergeTreeSettings & current_settings, const SettingsChanges & changes) const; /// Checks whether `change` violates these and clamps the `change` if so. - void clamp(const Settings & current_settings, SettingsChanges & changes) const; + void clamp(const Settings & current_settings, SettingsChanges & changes, SettingSource source) const; + friend bool operator ==(const SettingsConstraints & left, const SettingsConstraints & right); friend bool operator !=(const SettingsConstraints & left, const SettingsConstraints & right) { return !(left == right); } @@ -133,7 +135,10 @@ private: {} // Perform checking - bool check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const; + bool check(SettingChange & change, + const Field & new_value, + ReactionOnViolation reaction, + SettingSource source) const; }; struct StringHash @@ -145,7 +150,11 @@ private: } }; - bool checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const; + bool checkImpl(const Settings & current_settings, + SettingChange & change, + ReactionOnViolation reaction, + SettingSource source) const; + bool checkImpl(const MergeTreeSettings & current_settings, SettingChange & change, ReactionOnViolation reaction) const; Checker getChecker(const Settings & current_settings, std::string_view setting_name) const; diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 1723f85dc16..00e0ebd8b91 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -124,6 +124,9 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p if (e.code() == ErrorCodes::DEADLOCK_AVOIDED) continue; + /// Client can successfully connect to the server and + /// get ErrorCodes::USER_SESSION_LIMIT_EXCEEDED for suggestion connection. + /// We should not use std::cerr here, because this method works concurrently with the main thread. /// WriteBufferFromFileDescriptor will write directly to the file descriptor, avoiding data race on std::cerr. diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index ae8d5f8796d..393486f805c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -582,6 +582,7 @@ M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \ M(698, INVALID_REDIS_STORAGE_TYPE) \ M(699, INVALID_REDIS_TABLE_STRUCTURE) \ + M(700, USER_SESSION_LIMIT_EXCEEDED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/SettingSource.h b/src/Common/SettingSource.h new file mode 100644 index 00000000000..38e1bcae295 --- /dev/null +++ b/src/Common/SettingSource.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +namespace DB +{ + enum SettingSource + { + /// Query or session change: + /// SET = + /// SELECT ... SETTINGS [ = = = = ] + /// ALTER ROLE ... SETTINGS [ = = ] + /// ALTER USER ... SETTINGS [ = checkSettingsConstraints(*settings_from_query); + getContext()->checkSettingsConstraints(*settings_from_query, SettingSource::ROLE); } if (!query.cluster.empty()) diff --git a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp index 0727b6f2182..8a79bab0b0d 100644 --- a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp @@ -54,7 +54,7 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() settings_from_query = SettingsProfileElements{*query.settings, access_control}; if (!query.attach) - getContext()->checkSettingsConstraints(*settings_from_query); + getContext()->checkSettingsConstraints(*settings_from_query, SettingSource::PROFILE); } if (!query.cluster.empty()) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 165937560cc..475ee270506 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -133,7 +133,7 @@ BlockIO InterpreterCreateUserQuery::execute() settings_from_query = SettingsProfileElements{*query.settings, access_control}; if (!query.attach) - getContext()->checkSettingsConstraints(*settings_from_query); + getContext()->checkSettingsConstraints(*settings_from_query, SettingSource::USER); } if (!query.cluster.empty()) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d1b88988e5e..b10f85a2197 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -158,6 +159,7 @@ namespace CurrentMetrics extern const Metric IOWriterThreadsActive; } + namespace DB { @@ -276,6 +278,7 @@ struct ContextSharedPart : boost::noncopyable mutable QueryCachePtr query_cache; /// Cache of query results. mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. ProcessList process_list; /// Executing queries at the moment. + SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) @@ -739,6 +742,9 @@ std::unique_lock Context::getLock() const ProcessList & Context::getProcessList() { return shared->process_list; } const ProcessList & Context::getProcessList() const { return shared->process_list; } OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; } + +SessionTracker & Context::getSessionTracker() { return shared->session_tracker; } + MergeList & Context::getMergeList() { return shared->merge_list; } const MergeList & Context::getMergeList() const { return shared->merge_list; } MovesList & Context::getMovesList() { return shared->moves_list; } @@ -1094,7 +1100,7 @@ void Context::setUser(const UUID & user_id_, bool set_current_profiles_, bool se std::optional params; { auto lock = getLock(); - params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info}); + params.emplace(ContextAccessParams{user_id_, /* full_access= */ false, /* use_default_roles = */ true, {}, settings, current_database, client_info }); } /// `temp_access` is used here only to extract information about the user, not to actually check access. /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. @@ -1157,13 +1163,6 @@ std::optional Context::getUserID() const } -void Context::setQuotaKey(String quota_key_) -{ - auto lock = getLock(); - client_info.quota_key = std::move(quota_key_); -} - - void Context::setCurrentRoles(const std::vector & current_roles_) { auto lock = getLock(); @@ -1303,7 +1302,7 @@ void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, boo { auto lock = getLock(); if (check_constraints) - checkSettingsConstraints(profiles_info.settings); + checkSettingsConstraints(profiles_info.settings, SettingSource::PROFILE); applySettingsChanges(profiles_info.settings); settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); } @@ -1857,29 +1856,29 @@ void Context::applySettingsChanges(const SettingsChanges & changes) } -void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const +void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements, source); } -void Context::checkSettingsConstraints(const SettingChange & change) const +void Context::checkSettingsConstraints(const SettingChange & change, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change, source); } -void Context::checkSettingsConstraints(const SettingsChanges & changes) const +void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::checkSettingsConstraints(SettingsChanges & changes) const +void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::clampToSettingsConstraints(SettingsChanges & changes) const +void Context::clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes, source); } void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 0d567816ec9..676eb8412e5 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -202,6 +203,8 @@ using MergeTreeMetadataCachePtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; +class SessionTracker; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -539,8 +542,6 @@ public: String getUserName() const; - void setQuotaKey(String quota_key_); - void setCurrentRoles(const std::vector & current_roles_); void setCurrentRolesDefault(); boost::container::flat_set getCurrentRoles() const; @@ -735,11 +736,11 @@ public: void applySettingsChanges(const SettingsChanges & changes); /// Checks the constraints. - void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const; - void checkSettingsConstraints(const SettingChange & change) const; - void checkSettingsConstraints(const SettingsChanges & changes) const; - void checkSettingsConstraints(SettingsChanges & changes) const; - void clampToSettingsConstraints(SettingsChanges & changes) const; + void checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const; + void checkSettingsConstraints(const SettingChange & change, SettingSource source) const; + void checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const; + void checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const; + void clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const; void checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const; /// Reset settings to default value @@ -861,6 +862,8 @@ public: OvercommitTracker * getGlobalOvercommitTracker() const; + SessionTracker & getSessionTracker(); + MergeList & getMergeList(); const MergeList & getMergeList() const; diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index e9118b747e5..2c0baa0d4b3 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -15,7 +15,7 @@ namespace DB BlockIO InterpreterSetQuery::execute() { const auto & ast = query_ptr->as(); - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); auto session_context = getContext()->getSessionContext(); session_context->applySettingsChanges(ast.changes); session_context->addQueryParameters(ast.query_parameters); @@ -28,7 +28,7 @@ void InterpreterSetQuery::executeForCurrentContext(bool ignore_setting_constrain { const auto & ast = query_ptr->as(); if (!ignore_setting_constraints) - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); getContext()->applySettingsChanges(ast.changes); getContext()->resetSettingsToDefaultValue(ast.default_settings); } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index cadf619700c..de2a779b740 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -3,11 +3,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -200,7 +202,6 @@ private: LOG_TEST(log, "Schedule closing session with session_id: {}, user_id: {}", session.key.second, session.key.first); - } void cleanThread() @@ -336,6 +337,9 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So if (session_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "If there is a session context it must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before authentication finish"); + auto address = address_; if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) address = Poco::Net::SocketAddress{"127.0.0.1", 0}; @@ -490,6 +494,8 @@ ContextMutablePtr Session::makeSessionContext() throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating session context with user_id: {}", toString(auth_id), toString(*user_id)); @@ -503,13 +509,17 @@ ContextMutablePtr Session::makeSessionContext() prepared_client_info.reset(); /// Set user information for the new context: current profiles, roles, access rights. - if (user_id) - new_session_context->setUser(*user_id); + new_session_context->setUser(*user_id); /// Session context is ready. session_context = new_session_context; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + {}, + session_context->getSettingsRef().max_sessions_for_user); + return session_context; } @@ -521,6 +531,8 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", toString(auth_id), session_name_, toString(*user_id)); @@ -541,9 +553,23 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); + auto access = new_session_context->getAccess(); + UInt64 max_sessions_for_user = 0; /// Set user information for the new context: current profiles, roles, access rights. - if (user_id && !new_session_context->getAccess()->tryGetUser()) + if (!access->tryGetUser()) + { new_session_context->setUser(*user_id); + max_sessions_for_user = new_session_context->getSettingsRef().max_sessions_for_user; + } + else + { + // Always get setting from profile + // profile can be changed by ALTER PROFILE during single session + auto settings = access->getDefaultSettings(); + const Field * max_session_for_user_field = settings.tryGet("max_sessions_for_user"); + if (max_session_for_user_field) + max_sessions_for_user = max_session_for_user_field->safeGet(); + } /// Session context is ready. session_context = std::move(new_session_context); @@ -551,6 +577,11 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: named_session_created = new_named_session_created; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + { session_name_ }, + max_sessions_for_user); + return session_context; } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 36f811ccd24..51c0e3c71fa 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,8 @@ private: std::shared_ptr named_session; bool named_session_created = false; + SessionTracker::SessionTrackerHandle session_tracker_handle; + Poco::Logger * log = nullptr; }; diff --git a/src/Interpreters/SessionTracker.cpp b/src/Interpreters/SessionTracker.cpp new file mode 100644 index 00000000000..4636766e288 --- /dev/null +++ b/src/Interpreters/SessionTracker.cpp @@ -0,0 +1,62 @@ +#include "SessionTracker.h" + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int USER_SESSION_LIMIT_EXCEEDED; +} + +SessionTracker::Session::Session(SessionTracker & tracker_, + const UUID& user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept + : tracker(tracker_), user_id(user_id_), session_info_iter(session_info_iter_) +{ +} + +SessionTracker::Session::~Session() +{ + tracker.stopTracking(user_id, session_info_iter); +} + +SessionTracker::SessionTrackerHandle +SessionTracker::trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + if (sessions_for_user_iter == sessions_for_user.end()) + sessions_for_user_iter = sessions_for_user.emplace(user_id, SessionInfos()).first; + + SessionInfos & session_infos = sessions_for_user_iter->second; + if (max_sessions_for_user && session_infos.size() >= max_sessions_for_user) + { + throw Exception(ErrorCodes::USER_SESSION_LIMIT_EXCEEDED, + "User {} has overflown session count {}", + toString(user_id), + max_sessions_for_user); + } + + session_infos.emplace_front(session_info); + + return std::make_unique(*this, user_id, session_infos.begin()); +} + +void SessionTracker::stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + chassert(sessions_for_user_iter != sessions_for_user.end()); + + sessions_for_user_iter->second.erase(session_info_iter); + if (sessions_for_user_iter->second.empty()) + sessions_for_user.erase(sessions_for_user_iter); +} + +} diff --git a/src/Interpreters/SessionTracker.h b/src/Interpreters/SessionTracker.h new file mode 100644 index 00000000000..0827213aeed --- /dev/null +++ b/src/Interpreters/SessionTracker.h @@ -0,0 +1,60 @@ +#pragma once + +#include "ClientInfo.h" + +#include +#include +#include +#include + +namespace DB +{ + +struct SessionInfo +{ + const String session_id; +}; + +using SessionInfos = std::list; + +using SessionsForUser = std::unordered_map; + +class SessionTracker; + +class SessionTracker +{ +public: + class Session : boost::noncopyable + { + public: + explicit Session(SessionTracker & tracker_, + const UUID & user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept; + + ~Session(); + + private: + friend class SessionTracker; + + SessionTracker & tracker; + const UUID user_id; + const SessionInfos::const_iterator session_info_iter; + }; + + using SessionTrackerHandle = std::unique_ptr; + + SessionTrackerHandle trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user); + +private: + /// disallow manual messing with session tracking + friend class Session; + + std::mutex mutex; + SessionsForUser sessions_for_user TSA_GUARDED_BY(mutex); + + void stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter); +}; + +} diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 67d30012b0e..3370a8c009b 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -833,7 +833,7 @@ namespace { settings_changes.push_back({key, value}); } - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); query_context->applySettingsChanges(settings_changes); query_context->setCurrentQueryId(query_info.query_id()); @@ -1118,7 +1118,7 @@ namespace SettingsChanges settings_changes; for (const auto & [key, value] : external_table.settings()) settings_changes.push_back({key, value}); - external_table_context->checkSettingsConstraints(settings_changes); + external_table_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a0bfcd49dfd..ff5690a3b07 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -764,7 +764,7 @@ void HTTPHandler::processQuery( context->setDefaultFormat(default_format); /// For external data we also want settings - context->checkSettingsConstraints(settings_changes); + context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a747f06f1ce..5f3a7614eee 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -184,14 +184,17 @@ void TCPHandler::runImpl() try { receiveHello(); + + /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) + session->makeSessionContext(); + sendHello(); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); - if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) { - session->makeSessionContext(); - /// If session created, then settings in session context has been updated. /// So it's better to update the connection settings for flexibility. extractConnectionSettingsFromContext(session->sessionContext()); @@ -1181,7 +1184,6 @@ std::unique_ptr TCPHandler::makeSession() res->setClientName(client_name); res->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); res->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); - res->setQuotaClientKey(quota_key); res->setClientInterface(interface); return res; @@ -1274,11 +1276,10 @@ void TCPHandler::receiveHello() void TCPHandler::receiveAddendum() { if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY) - { readStringBinary(quota_key, *in); - if (!is_interserver_mode) - session->setQuotaClientKey(quota_key); - } + + if (!is_interserver_mode) + session->setQuotaClientKey(quota_key); } @@ -1591,12 +1592,12 @@ void TCPHandler::receiveQuery() if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { /// Throw an exception if the passed settings violate the constraints. - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); } else { /// Quietly clamp to the constraints if it's not an initial query. - query_context->clampToSettingsConstraints(settings_changes); + query_context->clampToSettingsConstraints(settings_changes, SettingSource::QUERY); } query_context->applySettingsChanges(settings_changes); diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 6e1604f4eb5..cb2b98937ca 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -81,5 +81,15 @@ "test_system_flush_logs/test.py::test_log_buffer_size_rows_flush_threshold", "test_system_flush_logs/test.py::test_log_max_size", "test_crash_log/test.py::test_pkill_query_log", - "test_crash_log/test.py::test_pkill" + "test_crash_log/test.py::test_pkill", + + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_postgres", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_mysql", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http_named_session", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_grpc", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp_and_others", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_end_session" ] diff --git a/tests/integration/test_profile_max_sessions_for_user/__init__.py b/tests/integration/test_profile_max_sessions_for_user/__init__.py new file mode 100755 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem b/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem new file mode 100755 index 00000000000..2e6cee0798d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR +XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5 +Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb +9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf +slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb +dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg== +-----END DH PARAMETERS----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/log.xml b/tests/integration/test_profile_max_sessions_for_user/configs/log.xml new file mode 100644 index 00000000000..22f95a8bd5d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml new file mode 100644 index 00000000000..3123c4a3d9c --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.crt b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt new file mode 100755 index 00000000000..070d37f3b77 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJANhP897Se2gmMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0yMDA0MTgyMTE2NDBaFw0yMTA0MTgyMTE2NDBaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAM92kcojQoMsjZ9YGhPMY6h/fDUsZeSKHLxgqE6wbmfU1oZKCPWqnvl+4n0J +pnT5h1ETxxYZLepimKq0DEVPUTmCl0xmcKbtUNiaTUKYKsdita6b2vZCX9wUPN9p +2Kjnm41l+aZNqIEBhIgHNWg9qowi20y0EIXR79jQLwwaInHAaJLZxVsqY2zjQ/D7 +1Zh82MXud7iqxBQiEfw9Cz35UFA239R8QTlPkVQfsN1gfLxnLk24QUX3o+hbUI1g +nlSpyYDHYQlOmwz8doDs6THHAZNJ4bPE9xHNFpw6dGZdbtH+IKQ/qRZIiOaiNuzJ +IOHl6XQDRDkW2LMTiCQ6fjC7Pz8CAwEAAaNQME4wHQYDVR0OBBYEFFvhaA/Eguyf +BXkMj8BkNLBqMnz2MB8GA1UdIwQYMBaAFFvhaA/EguyfBXkMj8BkNLBqMnz2MAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBACeU/oL48eVAKH7NQntHhRaJ +ZGeQzKIjrSBjFo8BGXD1nJZhUeFsylLrhCkC8/5/3grE3BNVX9bxcGjO81C9Mn4U +t0z13d6ovJjCZSQArtLwgeJGlpH7gNdD3DyT8DQmrqYVnmnB7UmBu45XH1LWGQZr +FAOhGRVs6s6mNj8QlLMgdmsOeOQnsGCMdoss8zV9vO2dc4A5SDSSL2mqGGY4Yjtt +X+XlEhXXnksGyx8NGVOZX4wcj8WeCAj/lihQ7Zh6XYwZH9i+E46ompUwoziZnNPu +2RH63tLNCxkOY2HF5VMlbMmzer3FkhlM6TAZZRPcvSphKPwXK4A33yqc6wnWvpc= +-----END CERTIFICATE----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.key b/tests/integration/test_profile_max_sessions_for_user/configs/server.key new file mode 100755 index 00000000000..b3dee82dcda --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDPdpHKI0KDLI2f +WBoTzGOof3w1LGXkihy8YKhOsG5n1NaGSgj1qp75fuJ9CaZ0+YdRE8cWGS3qYpiq +tAxFT1E5gpdMZnCm7VDYmk1CmCrHYrWum9r2Ql/cFDzfadio55uNZfmmTaiBAYSI +BzVoPaqMIttMtBCF0e/Y0C8MGiJxwGiS2cVbKmNs40Pw+9WYfNjF7ne4qsQUIhH8 +PQs9+VBQNt/UfEE5T5FUH7DdYHy8Zy5NuEFF96PoW1CNYJ5UqcmAx2EJTpsM/HaA +7OkxxwGTSeGzxPcRzRacOnRmXW7R/iCkP6kWSIjmojbsySDh5el0A0Q5FtizE4gk +On4wuz8/AgMBAAECggEAJ54J2yL+mZQRe2NUn4FBarTloDXZQ1pIgISov1Ybz0Iq +sTxEF728XAKp95y3J9Fa0NXJB+RJC2BGrRpy2W17IlNY1yMc0hOxg5t7s4LhcG/e +J/jlSG+GZL2MnlFVKXQJFWhq0yIzUmdayqstvLlB7z7cx/n+yb88YRfoVBRNjZEL +Tdrsw+087igDjrIxZJ3eMN5Wi434n9s4yAoRQC1bP5wcWx0gD4MzdmL8ip6suiRc +LRuBAhV/Op812xlxUhrF5dInUM9OLlGTXpUzexAS8Cyy7S4bfkW2BaCxTF7I7TFw +Whx28CKn/G49tIuU0m6AlxWbXpLVePTFyMb7RJz5cQKBgQD7VQd2u3HM6eE3PcXD +p6ObdLTUk8OAJ5BMmADFc71W0Epyo26/e8KXKGYGxE2W3fr13y+9b0fl5fxZPuhS +MgvXEO7rItAVsLcp0IzaqY0WUee2b4XWPAU0XuPqvjYMpx8H5OEHqFK6lhZysAqM +X7Ot3/Hux9X0MC4v5a/HNbDUOQKBgQDTUPaP3ADRrmpmE2sWuzWEnCSEz5f0tCLO +wTqhV/UraWUNlAbgK5NB790IjH/gotBSqqNPLJwJh0LUfClKM4LiaHsEag0OArOF +GhPMK1Ohps8c2RRsiG8+hxX2HEHeAVbkouEDPDiHdIW/92pBViDoETXL6qxDKbm9 +LkOcVeDfNwKBgQChh1xsqrvQ/t+IKWNZA/zahH9TwEP9sW/ESkz0mhYuHWA7nV4o +ItpFW+l2n+Nd+vy32OFN1p9W2iD9GrklWpTRfEiRRqaFyjVt4mMkhaPvnGRXlAVo +Utrldbb1v5ntN9txr2ARE9VXpe53dzzQSxGnxi4vUK/paK3GitAWMCOdwQKBgQCi +hmGsUXQb0P6qVYMGr6PAw2re7t8baLRguoMCdqjs45nCMLh9D2apzvb8TTtJJU/+ +VJlYGqJEPdDrpjcHh8jBo8QBqCM0RGWYGG9jl2syKB6hPGCV/PU6bSE58Y/DVNpk +7NUM7PM5UyhPddY2PC0A78Ole29UFLJzSzLa+b4DTwKBgH9Wh2k4YPnPcRrX89UL +eSwWa1CGq6HWX8Kd5qyz256aeHWuG5nv15+rBt+D7nwajUsqeVkAXz5H/dHuG1xz +jb7RW+pEjx0GVAmIbkM9vOLqEUfHHHPuk4AXCGGZ5sarPiKg4BHKBBsY1dpoO5UH +0j71fRA6zurHnTXDaCLWlUpZ +-----END PRIVATE KEY----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml new file mode 100644 index 00000000000..778d327c460 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml @@ -0,0 +1,17 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + /etc/clickhouse-server/config.d/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/users.xml b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml new file mode 100644 index 00000000000..3bed673b2ca --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml @@ -0,0 +1,16 @@ + + + + 2 + 0 + + + + + + + + 123 + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py new file mode 100755 index 00000000000..9e6a10e7e15 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -0,0 +1,222 @@ +import os + +import grpc +import pymysql.connections +import psycopg2 as py_psql +import pytest +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check + +MAX_SESSIONS_FOR_USER = 2 +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 + +TEST_USER = "test_user" +TEST_PASSWORD = "123" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + "python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} \ + {proto_dir}/clickhouse_grpc.proto".format( + proto_dir=proto_dir, gen_dir=gen_dir + ), + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/ssl_conf.xml", + "configs/dhparam.pem", + "configs/server.crt", + "configs/server.key", + ], + user_configs=["configs/users.xml"], + env_variables={"UBSAN_OPTIONS": "print_stacktrace=1"}, +) + + +def get_query(name, id): + return f"SElECT '{name}', {id}, sleep(1)" + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +def grpc_query(query_text, channel, session_id_): + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query_text, + session_id=session_id_, + user_name=TEST_USER, + password=TEST_PASSWORD, + ) + + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + return result.output.decode(DEFAULT_ENCODING) + + +def threaded_run_test(sessions): + thread_list = [] + for i in range(len(sessions)): + thread = ThreadWithException(target=sessions[i], args=(i,)) + thread_list.append(thread) + thread.start() + + for thread in thread_list: + thread.join() + + exception_count = 0 + for i in range(len(sessions)): + if thread_list[i].run_exception != None: + exception_count += 1 + + assert exception_count == 1 + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +class ThreadWithException(threading.Thread): + run_exception = None + + def run(self): + try: + super().run() + except: + self.run_exception = sys.exc_info() + + def join(self): + super().join() + + +def postgres_session(id): + ch = py_psql.connect( + host=instance.ip_address, + port=POSTGRES_SERVER_PORT, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + ) + cur = ch.cursor() + cur.execute(get_query("postgres_session", id)) + cur.fetchall() + + +def mysql_session(id): + client = pymysql.connections.Connection( + host=instance.ip_address, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + cursor.execute(get_query("mysql_session", id)) + cursor.fetchall() + + +def tcp_session(id): + instance.query(get_query("tcp_session", id), user=TEST_USER, password=TEST_PASSWORD) + + +def http_session(id): + instance.http_query( + get_query("http_session", id), user=TEST_USER, password=TEST_PASSWORD + ) + + +def http_named_session(id): + instance.http_query( + get_query("http_named_session", id), + user=TEST_USER, + password=TEST_PASSWORD, + params={"session_id": id}, + ) + + +def grpc_session(id): + grpc_query( + get_query("grpc_session", id), grpc_create_insecure_channel(), f"session_{id}" + ) + + +def test_profile_max_sessions_for_user_tcp(started_cluster): + threaded_run_test([tcp_session] * 3) + + +def test_profile_max_sessions_for_user_postgres(started_cluster): + threaded_run_test([postgres_session] * 3) + + +def test_profile_max_sessions_for_user_mysql(started_cluster): + threaded_run_test([mysql_session] * 3) + + +def test_profile_max_sessions_for_user_http(started_cluster): + threaded_run_test([http_session] * 3) + + +def test_profile_max_sessions_for_user_http_named_session(started_cluster): + threaded_run_test([http_named_session] * 3) + + +def test_profile_max_sessions_for_user_grpc(started_cluster): + threaded_run_test([grpc_session] * 3) + + +def test_profile_max_sessions_for_user_tcp_and_others(started_cluster): + threaded_run_test([tcp_session, grpc_session, grpc_session]) + threaded_run_test([tcp_session, http_session, http_session]) + threaded_run_test([tcp_session, mysql_session, mysql_session]) + threaded_run_test([tcp_session, postgres_session, postgres_session]) + threaded_run_test([tcp_session, http_session, postgres_session]) + threaded_run_test([tcp_session, postgres_session, http_session]) + + +def test_profile_max_sessions_for_user_end_session(started_cluster): + for conection_func in [ + tcp_session, + http_session, + grpc_session, + mysql_session, + postgres_session, + ]: + threaded_run_test([conection_func] * MAX_SESSIONS_FOR_USER) + threaded_run_test([conection_func] * MAX_SESSIONS_FOR_USER) + + +def test_profile_max_sessions_for_user_end_session(started_cluster): + instance.query_and_get_error("SET max_sessions_for_user = 10") diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference new file mode 100644 index 00000000000..f80f8738ff8 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.reference @@ -0,0 +1,12 @@ +test_alter_profile case: max_session_count 1 alter_sessions_count 1 +test_alter_profile case: max_session_count 2 alter_sessions_count 1 +USER_SESSION_LIMIT_EXCEEDED +test_alter_profile case: max_session_count 1 alter_sessions_count 2 +test_alter_profile case: max_session_count 2 alter_sessions_count 2 +READONLY +READONLY +READONLY +READONLY +READONLY +READONLY +READONLY diff --git a/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh new file mode 100755 index 00000000000..546c54a4de9 --- /dev/null +++ b/tests/queries/0_stateless/02832_alter_max_sessions_for_user.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SESSION_ID_PREFIX="02832_alter_max_sessions_session_$$" +PROFILE="02832_alter_max_sessions_profile_$$" +USER="02832_alter_max_sessions_user_$$" +USER2="02832_alter_max_sessions_user_two_$$" +ROLE="02832_alter_max_sessions_role_$$" + +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER}'" +${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"CREATE SETTINGS PROFILE ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"CREATE USER '${USER}' SETTINGS PROFILE '${PROFILE}'" + +function test_alter_profile() +{ + local max_session_count="$1" + local alter_sessions_count="$2" + echo $"test_alter_profile case: max_session_count ${max_session_count} alter_sessions_count ${alter_sessions_count}" + + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${max_session_count}" + + # Create sesssions with $max_session_count resriction + for ((i = 1 ; i <= ${max_session_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + # Skip output from this query + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=0" --data-binary "SELECT 1" > /dev/null + done + + # Update resriction to $alter_sessions_count + ${CLICKHOUSE_CLIENT} -q $"ALTER SETTINGS PROFILE ${PROFILE} SETTINGS max_sessions_for_user = ${alter_sessions_count}" + + # Simultaneous sessions should use max settings from profile ($alter_sessions_count) + for ((i = 1 ; i <= ${max_session_count} ; i++)); do + local session_id="${SESSION_ID_PREFIX}_${i}" + # ignore select 1, we need only errors + ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${USER}&session_id=${session_id}&session_check=1" --data-binary "select sleep(0.3)" | grep -o -m 1 'USER_SESSION_LIMIT_EXCEEDED' & + done + + wait +} + +test_alter_profile 1 1 +test_alter_profile 2 1 +test_alter_profile 1 2 +test_alter_profile 2 2 + +${CLICKHOUSE_CLIENT} -q "SELECT 1 SETTINGS max_sessions_for_user = 1" 2>&1 | grep -m 1 -o 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"SET max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} --max_sessions_for_user=1 -q $"SELECT 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +# max_sessions_for_user is profile setting +${CLICKHOUSE_CLIENT} -q $"CREATE USER ${USER2} SETTINGS max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"ALTER USER ${USER} SETTINGS max_sessions_for_user = 1" 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"CREATE ROLE ${ROLE} SETTINGS max_sessions_for_user = 1" 2>&1 | grep -o -m 1 'READONLY' | head -1 +${CLICKHOUSE_CLIENT} -q $"CREATE ROLE ${ROLE}" +${CLICKHOUSE_CLIENT} -q $"ALTER ROLE ${ROLE} SETTINGS max_sessions_for_user = 1 " 2>&1 | grep -o -m 1 'READONLY' | head -1 + +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER}'" +${CLICKHOUSE_CLIENT} -q $"DROP USER IF EXISTS '${USER2}'" +${CLICKHOUSE_CLIENT} -q $"DROP PROFILE IF EXISTS ${PROFILE}" +${CLICKHOUSE_CLIENT} -q $"DROP ROLE IF EXISTS ${ROLE}" From afb76101b797a144bc2e8d6cff5109c9036fc46a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 31 Jul 2023 05:23:28 +0200 Subject: [PATCH 140/151] Remove obsolete part of a check name --- .github/workflows/master.yml | 2 +- .github/workflows/pull_request.yml | 2 +- tests/ci/ci_config.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index e5b797beebd..ae1862e327f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -3643,7 +3643,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index dd834959578..d97b9975c3c 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4541,7 +4541,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index bea654ca76f..517e40fd2d6 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -346,7 +346,7 @@ CI_CONFIG = { "Compatibility check (aarch64)": { "required_build": "package_aarch64", }, - "Unit tests (release-clang)": { + "Unit tests (release)": { "required_build": "binary_release", }, "Unit tests (asan)": { @@ -509,7 +509,7 @@ REQUIRED_CHECKS = [ "Style Check", "Unit tests (asan)", "Unit tests (msan)", - "Unit tests (release-clang)", + "Unit tests (release)", "Unit tests (tsan)", "Unit tests (ubsan)", ] From efad90d0f2731142677287495f4d8f47a4c32b51 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 31 Jul 2023 07:26:36 +0200 Subject: [PATCH 141/151] Maybe fix TLS tests --- tests/integration/test_ssl_cert_authentication/test.py | 6 +++--- tests/integration/test_tlsv1_3/test.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index ff2de7491e1..fe6eb52e50e 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -208,7 +208,7 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -314,7 +314,7 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -334,7 +334,7 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index f5c2be51ed7..094804bf963 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -96,7 +96,7 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -202,7 +202,7 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -222,7 +222,7 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" From 010f3f1db1fc8adad0a74f424e153c28f7072e16 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 31 Jul 2023 05:39:49 +0000 Subject: [PATCH 142/151] Automatic style fix --- .../integration/test_ssl_cert_authentication/test.py | 12 +++++++++--- tests/integration/test_tlsv1_3/test.py | 12 +++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index fe6eb52e50e..d1ae39ca378 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -208,7 +208,9 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -314,7 +316,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -334,7 +338,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index 094804bf963..87c03c56f91 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -96,7 +96,9 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -202,7 +204,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -222,7 +226,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and (("Broken pipe" in err_str) or ("EOF occurred" in err_str)): + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" From 2e0d82765c2dbb4e021c9f174eaba8637f74401f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 31 Jul 2023 11:43:04 +0300 Subject: [PATCH 143/151] Move "reconfig" to experimental in the changelog See https://github.com/ClickHouse/ClickHouse/issues/52798 Automatic continuous fuzzing has found an issue. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f401b346726..bf26708ebb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,7 +23,6 @@ * Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). * Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). * Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). -* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). * Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). * Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). * Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). @@ -124,6 +123,7 @@ * (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). * Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). * The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete. #### Build/Testing/Packaging Improvement * Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). From a473dc4b51a62f8b44137a9b84c8dc09f6b97542 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 31 Jul 2023 09:24:06 +0000 Subject: [PATCH 144/151] Wait for response --- src/Coordination/KeeperServer.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index a4c3d91e1c9..88b9f1cedb4 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -794,8 +794,14 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action) std::lock_guard _{server_write_mutex}; if (const auto * add = std::get_if(&action)) - return raft_instance->get_srv_config(add->id) != nullptr - || raft_instance->add_srv(static_cast(*add))->get_accepted(); + { + if (raft_instance->get_srv_config(add->id) != nullptr) + return true; + + auto resp = raft_instance->add_srv(static_cast(*add)); + resp->get(); + return resp->get_accepted(); + } else if (const auto * remove = std::get_if(&action)) { if (remove->id == raft_instance->get_leader()) @@ -807,8 +813,12 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action) return false; } - return raft_instance->get_srv_config(remove->id) == nullptr - || raft_instance->remove_srv(remove->id)->get_accepted(); + if (raft_instance->get_srv_config(remove->id) == nullptr) + return true; + + auto resp = raft_instance->remove_srv(remove->id); + resp->get(); + return resp->get_accepted(); } else if (const auto * update = std::get_if(&action)) { From 7c49105cd56395bde38f0d7aff862c65c78c6989 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 31 Jul 2023 12:27:35 +0200 Subject: [PATCH 145/151] Allow OOM in Stress and Upgrade checks --- docker/test/stress/run.sh | 6 ++++++ docker/test/upgrade/run.sh | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 9217fcfddd9..a2264b8f3e6 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -233,4 +233,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 13c352d5d41..d6cd6987e83 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -231,4 +231,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps From 985b2a010af9ba5de9e72fc4623ea05e7d91c4b2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 31 Jul 2023 15:17:47 +0300 Subject: [PATCH 146/151] Add a tool to upload `-ftime-trace` to ClickHouse (#52776) * Add a tool to upload `-ftime-trace` to ClickHouse * Add a tool to upload `-ftime-trace` to ClickHouse * Add a tool to upload `-ftime-trace` to ClickHouse --- .../prepare-time-trace/prepare-time-trace.sh | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100755 utils/prepare-time-trace/prepare-time-trace.sh diff --git a/utils/prepare-time-trace/prepare-time-trace.sh b/utils/prepare-time-trace/prepare-time-trace.sh new file mode 100755 index 00000000000..300a32b0fd1 --- /dev/null +++ b/utils/prepare-time-trace/prepare-time-trace.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# This scripts transforms the output of clang's -ftime-trace JSON files into a format to upload to ClickHouse + +# Example: +# mkdir time_trace +# utils/prepare-time-trace/prepare-time-trace.sh build time_trace + +# See also https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + +< \"${OUTPUT_DIR}/\$\$\" +" + +# Now you can upload it as follows: + +#cat "$OUTPUT_DIR"/* | clickhouse-client --progress --query "INSERT INTO build_time_trace (extra_column_names, file, library, time, pid, tid, ph, ts, dur, cat, name, detail, count, avgMs, args_name) FORMAT JSONCompactEachRow" From efba3a21139adf8004d975b2d276edfa39782b19 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 31 Jul 2023 14:32:09 +0200 Subject: [PATCH 147/151] Add more logging and touch test for materialize mysql --- src/Common/mysqlxx/Pool.cpp | 3 +++ src/Common/mysqlxx/mysqlxx/Pool.h | 2 +- src/Databases/MySQL/DatabaseMaterializedMySQL.cpp | 2 ++ src/Databases/MySQL/DatabaseMaterializedMySQL.h | 1 + src/Databases/MySQL/MaterializedMySQLSyncThread.cpp | 6 +++++- .../integration/test_materialized_mysql_database/test.py | 8 -------- 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index 64a69c48e1d..43df0aa6708 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -153,7 +153,10 @@ Pool::Entry Pool::get(uint64_t wait_timeout) for (auto & connection : connections) { if (connection->ref_count == 0) + { + logger.test("Found free connection in pool, returning it to the caller"); return Entry(connection, this); + } } logger.trace("(%s): Trying to allocate a new connection.", getDescription()); diff --git a/src/Common/mysqlxx/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h index c2190fba684..52d116e39ce 100644 --- a/src/Common/mysqlxx/mysqlxx/Pool.h +++ b/src/Common/mysqlxx/mysqlxx/Pool.h @@ -26,7 +26,7 @@ namespace mysqlxx * * void thread() * { - * mysqlxx::Pool::Entry connection = pool.Get(); + * mysqlxx::Pool::Entry connection = pool.Get(); * std::string s = connection->query("SELECT 'Hello, world!' AS world").use().fetch()["world"].getString(); * } * TODO: simplify with PoolBase. diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 653c2dc27b6..f7e669d9feb 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -65,6 +65,7 @@ void DatabaseMaterializedMySQL::setException(const std::exception_ptr & exceptio void DatabaseMaterializedMySQL::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) { + LOG_TRACE(log, "Starting MaterializeMySQL tables"); DatabaseAtomic::startupTables(thread_pool, mode); if (mode < LoadingStrictnessLevel::FORCE_ATTACH) @@ -122,6 +123,7 @@ void DatabaseMaterializedMySQL::alterTable(ContextPtr context_, const StorageID void DatabaseMaterializedMySQL::drop(ContextPtr context_) { + LOG_TRACE(log, "Dropping MaterializeMySQL database"); /// Remove metadata info fs::path metadata(getMetadataPath() + "/.metadata"); diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 3698abf5542..60a88ea0d67 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 673bd155f77..c9e10b27caa 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -1,3 +1,4 @@ +#include "Common/logger_useful.h" #include "config.h" #if USE_MYSQL @@ -499,7 +500,10 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { throw; } - catch (const mysqlxx::ConnectionFailed &) {} + catch (const mysqlxx::ConnectionFailed & ex) + { + LOG_TRACE(log, "Connection to MySQL failed {}", ex.displayText()); + } catch (const mysqlxx::BadQuery & e) { // Lost connection to MySQL server during query diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 1fd09f733f0..5096e0a03b7 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -1,17 +1,11 @@ -import os -import os.path as p import time -import pwd -import re import pymysql.cursors import pytest from helpers.cluster import ( ClickHouseCluster, ClickHouseInstance, get_docker_compose_path, - run_and_check, ) -import docker import logging from . import materialized_with_ddl @@ -63,8 +57,6 @@ class MySQLConnection: user="root", password="clickhouse", ip_address=None, - docker_compose=None, - project_name=cluster.project_name, ): self.user = user self.port = port From 8f5d0c0cd4b150d32a1257d18ee77cd582ac43cd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 31 Jul 2023 13:03:04 +0000 Subject: [PATCH 148/151] Revert "Merge pull request #52514 from azat/tests/fix-test_version_update_after_mutation" This reverts commit dd491eeae2e1ee030f4db5d3f0837067a2ee0ca8, reversing changes made to b225f9c34bf6064d246ede33f98e3c573fcda142. --- tests/integration/helpers/cluster.py | 9 --------- .../force_remove_data_recursively_on_drop.xml | 7 ------- .../test_version_update_after_mutation/test.py | 13 +++---------- 3 files changed, 3 insertions(+), 26 deletions(-) delete mode 100644 tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0448eb2437f..eff44de842a 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3199,7 +3199,6 @@ class ClickHouseInstance: ): self.name = name self.base_cmd = cluster.base_cmd - self.base_dir = base_path self.docker_id = cluster.get_instance_docker_id(self.name) self.cluster = cluster self.hostname = hostname if hostname is not None else self.name @@ -4194,14 +4193,6 @@ class ClickHouseInstance: ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"] ) - def put_users_config(self, config_path): - """Put new config (useful if you cannot put it at the start)""" - - instance_config_dir = p.abspath(p.join(self.path, "configs")) - users_d_dir = p.abspath(p.join(instance_config_dir, "users.d")) - config_path = p.join(self.base_dir, config_path) - shutil.copy(config_path, users_d_dir) - def create_dir(self): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml deleted file mode 100644 index 7a00648b28e..00000000000 --- a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 416220c93c3..c80205d48c1 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -51,12 +51,6 @@ def start_cluster(): cluster.shutdown() -def restart_node(node): - # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old) - node.put_users_config("configs/force_remove_data_recursively_on_drop.xml") - node.restart_with_latest_version(signal=9, fix_metadata=True) - - def test_mutate_and_upgrade(start_cluster): for node in [node1, node2]: node.query("DROP TABLE IF EXISTS mt") @@ -73,9 +67,8 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader - - restart_node(node1) - restart_node(node2) + node1.restart_with_latest_version(signal=9, fix_metadata=True) + node2.restart_with_latest_version(signal=9, fix_metadata=True) # After hard restart table can be in readonly mode exec_query_with_retry( @@ -131,7 +124,7 @@ def test_upgrade_while_mutation(start_cluster): # (We could be in process of creating some system table, which will leave empty directory on restart, # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files) node3.query("SYSTEM FLUSH LOGS") - restart_node(node3) + node3.restart_with_latest_version(signal=9, fix_metadata=True) # checks for readonly exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60) From b98d54cdf3a2f931040067418797336c6fb218a4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 31 Jul 2023 13:05:53 +0000 Subject: [PATCH 149/151] Add flushing logs --- tests/integration/test_version_update_after_mutation/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index c80205d48c1..eac214ea99f 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -67,6 +67,8 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader + node1.query("SYSTEM FLUSH LOGS") + node2.query("SYSTEM FLUSH LOGS") node1.restart_with_latest_version(signal=9, fix_metadata=True) node2.restart_with_latest_version(signal=9, fix_metadata=True) From c4a00b8c68ce39c4ee325ab8b29ea86e58dea8af Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 31 Jul 2023 15:39:07 +0000 Subject: [PATCH 150/151] Throw S3Exception whenever possible. --- src/Backups/BackupIO_S3.cpp | 6 +++--- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/IO/S3/copyS3File.cpp | 2 +- src/IO/S3/getObjectInfo.cpp | 2 +- src/Storages/DataLakes/S3MetadataReader.cpp | 4 ++-- src/Storages/StorageS3.cpp | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index d487ec6e80e..40a043be552 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -88,7 +88,7 @@ namespace request.SetMaxKeys(1); auto outcome = client.ListObjects(request); if (!outcome.IsSuccess()) - throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); return outcome.GetResult().GetContents(); } @@ -271,7 +271,7 @@ void BackupWriterS3::removeFile(const String & file_name) request.SetKey(fs::path(s3_uri.key) / file_name); auto outcome = client->DeleteObject(request); if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType())) - throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } void BackupWriterS3::removeFiles(const Strings & file_names) @@ -329,7 +329,7 @@ void BackupWriterS3::removeFilesBatch(const Strings & file_names) auto outcome = client->DeleteObjects(request); if (!outcome.IsSuccess() && !isNotFoundError(outcome.GetError().GetErrorType())) - throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e46ca3d0828..4da7b3e892f 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -135,7 +135,7 @@ private: return result; } - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 2de2ccd0f9f..dda107840cb 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -783,7 +783,7 @@ namespace if (!outcome.IsSuccess()) { abortMultipartUpload(); - throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } return outcome.GetResult().GetCopyPartResult().GetETag(); diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index c652f16ab20..88f79f8d8d5 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -85,7 +85,7 @@ ObjectInfo getObjectInfo( } else if (throw_on_error) { - throw DB::Exception(ErrorCodes::S3_ERROR, + throw S3Exception(error.GetErrorType(), "Failed to get object info: {}. HTTP response code: {}", error.GetMessage(), static_cast(error.GetResponseCode())); } diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index f62c440bc2f..ac472c190e4 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -57,8 +57,8 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( { outcome = client->ListObjectsV2(request); if (!outcome.IsSuccess()) - throw Exception( - ErrorCodes::S3_ERROR, + throw S3Exception( + outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", quoteString(bucket), quoteString(base_configuration.url.key), diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index b52150250b8..ebce3a7aeca 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -245,7 +245,7 @@ private: if (!outcome.IsSuccess()) { - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } @@ -1195,7 +1195,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(ErrorCodes::S3_ERROR, "{}: {}", std::to_string(static_cast(err.GetErrorType())), err.GetMessage()); + throw S3Exception(err.GetMessage(), err.GetErrorType()); } for (const auto & error : response.GetResult().GetErrors()) From 4894a357f5a39943db1a95a65b3da507e82df83e Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Mon, 31 Jul 2023 17:49:59 -0600 Subject: [PATCH 151/151] Update datetime.md Correct URLs for settings-formats.md to date_time_input_format and date_time_output_format --- docs/en/sql-reference/data-types/datetime.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index fe279edb709..c99c8791542 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -140,8 +140,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)