diff --git a/.github/ISSUE_TEMPLATE/85_bug-report.md b/.github/ISSUE_TEMPLATE/85_bug-report.md index fde5917a8a7..93b2342af70 100644 --- a/.github/ISSUE_TEMPLATE/85_bug-report.md +++ b/.github/ISSUE_TEMPLATE/85_bug-report.md @@ -7,6 +7,8 @@ assignees: '' --- +> Please make sure that the version you're using is still supported (you can find the list [here](https://github.com/ClickHouse/ClickHouse/blob/master/SECURITY.md#scope-and-supported-versions)). + > You have to provide the following information whenever possible. **Describe what's wrong** diff --git a/.gitmodules b/.gitmodules index f790e0f8d5a..904d2cec249 100644 --- a/.gitmodules +++ b/.gitmodules @@ -184,7 +184,7 @@ url = https://github.com/ClickHouse/nanodbc [submodule "contrib/datasketches-cpp"] path = contrib/datasketches-cpp - url = https://github.com/ClickHouse/datasketches-cpp + url = https://github.com/apache/datasketches-cpp [submodule "contrib/yaml-cpp"] path = contrib/yaml-cpp url = https://github.com/ClickHouse/yaml-cpp diff --git a/base/base/defines.h b/base/base/defines.h index 4d3d8796d21..d852f6b9f63 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -150,6 +150,7 @@ # define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it # define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure # define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability +# define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of @@ -177,6 +178,7 @@ # define TSA_ACQUIRE_SHARED(...) # define TSA_TRY_ACQUIRE_SHARED(...) # define TSA_RELEASE_SHARED(...) +# define TSA_SCOPED_LOCKABLE # define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) # define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) diff --git a/base/base/sort.h b/base/base/sort.h index 912545979dc..1a814587763 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -131,3 +131,29 @@ void sort(RandomIt first, RandomIt last) using comparator = std::less; ::sort(first, last, comparator()); } + +/** Try to fast sort elements for common sorting patterns: + * 1. If elements are already sorted. + * 2. If elements are already almost sorted. + * 3. If elements are already sorted in reverse order. + * + * Returns true if fast sort was performed or elements were already sorted, false otherwise. + */ +template +bool trySort(RandomIt first, RandomIt last, Compare compare) +{ +#ifndef NDEBUG + ::shuffle(first, last); +#endif + + ComparatorWrapper compare_wrapper = compare; + return ::pdqsort_try_sort(first, last, compare_wrapper); +} + +template +bool trySort(RandomIt first, RandomIt last) +{ + using value_type = typename std::iterator_traits::value_type; + using comparator = std::less; + return ::trySort(first, last, comparator()); +} diff --git a/contrib/avro b/contrib/avro index 7832659ec98..2fb8a8a6ec0 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit 7832659ec986075d560f930c288e973c64679552 +Subproject commit 2fb8a8a6ec0eab9109b68abf3b4857e8c476b918 diff --git a/contrib/datasketches-cpp b/contrib/datasketches-cpp index 7abd49bb2e7..c3abaaefe5f 160000 --- a/contrib/datasketches-cpp +++ b/contrib/datasketches-cpp @@ -1 +1 @@ -Subproject commit 7abd49bb2e72bf9a5029993d31dcb1872da88292 +Subproject commit c3abaaefe5fa400eed99e082af07c1b61a7144db diff --git a/contrib/google-protobuf b/contrib/google-protobuf index c47efe2d8f6..2a4fa1a4e95 160000 --- a/contrib/google-protobuf +++ b/contrib/google-protobuf @@ -1 +1 @@ -Subproject commit c47efe2d8f6a60022b49ecd6cc23660687c8598f +Subproject commit 2a4fa1a4e95012d754ac55d43c8bc462dd1c78a8 diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt index 8afb86b25dd..268f0fbe0e4 100644 --- a/contrib/google-protobuf-cmake/CMakeLists.txt +++ b/contrib/google-protobuf-cmake/CMakeLists.txt @@ -36,7 +36,6 @@ set(libprotobuf_lite_files ${protobuf_source_dir}/src/google/protobuf/arenastring.cc ${protobuf_source_dir}/src/google/protobuf/extension_set.cc ${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc - ${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven_lite.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc ${protobuf_source_dir}/src/google/protobuf/implicit_weak_message.cc ${protobuf_source_dir}/src/google/protobuf/io/coded_stream.cc @@ -87,13 +86,13 @@ set(libprotobuf_files ${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc - ${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc ${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc ${protobuf_source_dir}/src/google/protobuf/io/printer.cc ${protobuf_source_dir}/src/google/protobuf/io/tokenizer.cc ${protobuf_source_dir}/src/google/protobuf/map_field.cc ${protobuf_source_dir}/src/google/protobuf/message.cc ${protobuf_source_dir}/src/google/protobuf/reflection_ops.cc + ${protobuf_source_dir}/src/google/protobuf/repeated_ptr_field.cc ${protobuf_source_dir}/src/google/protobuf/service.cc ${protobuf_source_dir}/src/google/protobuf/source_context.pb.cc ${protobuf_source_dir}/src/google/protobuf/struct.pb.cc @@ -143,21 +142,21 @@ add_library(protobuf::libprotobuf ALIAS _libprotobuf) set(libprotoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/code_generator.cc ${protobuf_source_dir}/src/google/protobuf/compiler/command_line_interface.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_enum.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_enum_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_extension.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_file.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_generator.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_helpers.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_map_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_message.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_message_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_padding_optimizer.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_parse_function_generator.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_service.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/cpp_string_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/enum_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/extension.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/file.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/helpers.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/map_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/message.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/message_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/padding_optimizer.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/parse_function_generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/primitive_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/service.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/cpp/string_field.cc ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_doc_comment.cc ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_enum.cc ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_enum_field.cc @@ -174,37 +173,35 @@ set(libprotoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_repeated_primitive_field.cc ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_source_generator_base.cc ${protobuf_source_dir}/src/google/protobuf/compiler/csharp/csharp_wrapper_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_context.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_doc_comment.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_enum.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_enum_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_enum_field_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_enum_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_extension.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_extension_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_file.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_generator.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_generator_factory.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_helpers.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_kotlin_generator.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_map_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_map_field_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_message.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_message_builder.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_message_builder_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_message_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_message_field_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_message_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_name_resolver.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_primitive_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_primitive_field_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_service.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_shared_code_generator.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_string_field.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/java/java_string_field_lite.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/js/js_generator.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/context.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/doc_comment.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/enum.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/enum_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/enum_field_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/enum_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/extension.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/extension_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/file.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/generator_factory.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/helpers.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/kotlin_generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/map_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/map_field_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/message.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_builder.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_builder_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_field_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/message_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/name_resolver.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/primitive_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/primitive_field_lite.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/service.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/shared_code_generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/string_field.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/java/string_field_lite.cc ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_enum.cc ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_enum_field.cc ${protobuf_source_dir}/src/google/protobuf/compiler/objectivec/objectivec_extension.cc @@ -220,7 +217,9 @@ set(libprotoc_files ${protobuf_source_dir}/src/google/protobuf/compiler/php/php_generator.cc ${protobuf_source_dir}/src/google/protobuf/compiler/plugin.cc ${protobuf_source_dir}/src/google/protobuf/compiler/plugin.pb.cc - ${protobuf_source_dir}/src/google/protobuf/compiler/python/python_generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/python/generator.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/python/helpers.cc + ${protobuf_source_dir}/src/google/protobuf/compiler/python/pyi_generator.cc ${protobuf_source_dir}/src/google/protobuf/compiler/ruby/ruby_generator.cc ${protobuf_source_dir}/src/google/protobuf/compiler/subprocess.cc ${protobuf_source_dir}/src/google/protobuf/compiler/zip_writer.cc diff --git a/contrib/grpc b/contrib/grpc index c52656e2bfc..bef8212d1e0 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit c52656e2bfcda3450bd6a7c247d2d9eeb8498524 +Subproject commit bef8212d1e01f99e406c282ceab3d42da08e09ce diff --git a/contrib/orc b/contrib/orc index a20d1d9d7ad..f31c271110a 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6 +Subproject commit f31c271110a2f0dac908a152f11708193ae209ee diff --git a/contrib/pdqsort/pdqsort.h b/contrib/pdqsort/pdqsort.h index 01e82b710ee..cbfc82a4f41 100644 --- a/contrib/pdqsort/pdqsort.h +++ b/contrib/pdqsort/pdqsort.h @@ -54,8 +54,10 @@ namespace pdqsort_detail { block_size = 64, // Cacheline size, assumes power of two. - cacheline_size = 64 + cacheline_size = 64, + /// Try sort allowed iterations + try_sort_iterations = 3, }; #if __cplusplus >= 201103L @@ -501,6 +503,167 @@ namespace pdqsort_detail { leftmost = false; } } + + template + inline bool pdqsort_try_sort_loop(Iter begin, + Iter end, + Compare comp, + size_t bad_allowed, + size_t iterations_allowed, + bool force_sort = false, + bool leftmost = true) { + typedef typename std::iterator_traits::difference_type diff_t; + + // Use a while loop for tail recursion elimination. + while (true) { + if (!force_sort && iterations_allowed == 0) { + return false; + } + + diff_t size = end - begin; + + // Insertion sort is faster for small arrays. + if (size < insertion_sort_threshold) { + if (leftmost) insertion_sort(begin, end, comp); + else unguarded_insertion_sort(begin, end, comp); + + return true; + } + + // Choose pivot as median of 3 or pseudomedian of 9. + diff_t s2 = size / 2; + if (size > ninther_threshold) { + sort3(begin, begin + s2, end - 1, comp); + sort3(begin + 1, begin + (s2 - 1), end - 2, comp); + sort3(begin + 2, begin + (s2 + 1), end - 3, comp); + sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); + std::iter_swap(begin, begin + s2); + } else sort3(begin + s2, begin, end - 1, comp); + + // If *(begin - 1) is the end of the right partition of a previous partition operation + // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our + // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in + // the left partition, greater elements in the right partition. We do not have to + // recurse on the left partition, since it's sorted (all equal). + if (!leftmost && !comp(*(begin - 1), *begin)) { + begin = partition_left(begin, end, comp) + 1; + continue; + } + + // Partition and get results. + std::pair part_result = + Branchless ? partition_right_branchless(begin, end, comp) + : partition_right(begin, end, comp); + Iter pivot_pos = part_result.first; + bool already_partitioned = part_result.second; + + // Check for a highly unbalanced partition. + diff_t l_size = pivot_pos - begin; + diff_t r_size = end - (pivot_pos + 1); + bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; + + // If we got a highly unbalanced partition we shuffle elements to break many patterns. + if (highly_unbalanced) { + if (!force_sort) { + return false; + } + + // If we had too many bad partitions, switch to heapsort to guarantee O(n log n). + if (--bad_allowed == 0) { + std::make_heap(begin, end, comp); + std::sort_heap(begin, end, comp); + return true; + } + + if (l_size >= insertion_sort_threshold) { + std::iter_swap(begin, begin + l_size / 4); + std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); + + if (l_size > ninther_threshold) { + std::iter_swap(begin + 1, begin + (l_size / 4 + 1)); + std::iter_swap(begin + 2, begin + (l_size / 4 + 2)); + std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); + std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); + } + } + + if (r_size >= insertion_sort_threshold) { + std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); + std::iter_swap(end - 1, end - r_size / 4); + + if (r_size > ninther_threshold) { + std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); + std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); + std::iter_swap(end - 2, end - (1 + r_size / 4)); + std::iter_swap(end - 3, end - (2 + r_size / 4)); + } + } + } else { + // If we were decently balanced and we tried to sort an already partitioned + // sequence try to use insertion sort. + if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp) + && partial_insertion_sort(pivot_pos + 1, end, comp)) { + return true; + } + } + + // Sort the left partition first using recursion and do tail recursion elimination for + // the right-hand partition. + if (pdqsort_try_sort_loop(begin, + pivot_pos, + comp, + bad_allowed, + iterations_allowed - 1, + force_sort, + leftmost)) { + force_sort = true; + } else { + return false; + } + + --iterations_allowed; + begin = pivot_pos + 1; + leftmost = false; + } + + return false; + } + + template + inline bool pdqsort_try_sort_impl(Iter begin, Iter end, Compare comp, size_t bad_allowed) + { + typedef typename std::iterator_traits::difference_type diff_t; + + static constexpr size_t iterations_allowed = pdqsort_detail::try_sort_iterations; + static constexpr size_t num_to_try = 16; + + diff_t size = end - begin; + + if (size > num_to_try * 10) + { + size_t out_of_order_elements = 0; + + for (size_t i = 1; i < num_to_try; ++i) + { + diff_t offset = size / num_to_try; + + diff_t prev_position = offset * (i - 1); + diff_t curr_position = offset * i; + diff_t next_position = offset * (i + 1) - 1; + + bool prev_less_than_curr = comp(*(begin + prev_position), *(begin + curr_position)); + bool curr_less_than_next = comp(*(begin + curr_position), *(begin + next_position)); + if ((prev_less_than_curr && curr_less_than_next) || (!prev_less_than_curr && !curr_less_than_next)) + continue; + + ++out_of_order_elements; + if (out_of_order_elements > iterations_allowed) + return false; + } + } + + return pdqsort_try_sort_loop(begin, end, comp, bad_allowed, iterations_allowed); + } } @@ -538,6 +701,41 @@ inline void pdqsort_branchless(Iter begin, Iter end) { pdqsort_branchless(begin, end, std::less()); } +template +inline bool pdqsort_try_sort(Iter begin, Iter end, Compare comp) { + if (begin == end) return true; + +#if __cplusplus >= 201103L + return pdqsort_detail::pdqsort_try_sort_impl::type>::value && + std::is_arithmetic::value_type>::value>( + begin, end, comp, pdqsort_detail::log2(end - begin)); +#else + return pdqsort_detail::pdqsort_try_sort_impl( + begin, end, comp, pdqsort_detail::log2(end - begin)); +#endif +} + +template +inline bool pdqsort_try_sort(Iter begin, Iter end) { + typedef typename std::iterator_traits::value_type T; + return pdqsort_try_sort(begin, end, std::less()); +} + +template +inline bool pdqsort_try_sort_branchless(Iter begin, Iter end, Compare comp) { + if (begin == end) return true; + + return pdqsort_detail::pdqsort_try_sort_impl( + begin, end, comp, pdqsort_detail::log2(end - begin)); +} + +template +inline bool pdqsort_try_sort_branchless(Iter begin, Iter end) { + typedef typename std::iterator_traits::value_type T; + return pdqsort_try_sort_branchless(begin, end, std::less()); +} + #undef PDQSORT_PREFER_MOVE diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 326f2a751e6..af1ce0c4dd4 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -210,7 +210,7 @@ detach quit " > script.gdb - gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" & + gdb -batch -command script.gdb -p $server_pid & sleep 5 # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: @@ -219,13 +219,12 @@ quit # to freeze and the fuzzer will fail. In debug build it can take a lot of time. for _ in {1..180} do - sleep 1 if clickhouse-client --query "select 1" then break fi + sleep 1 done - clickhouse-client --query "select 1" # This checks that the server is responding kill -0 $server_pid # This checks that it is our server that is started and not some other one echo 'Server started and responded' diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 24df1b9c6b1..7d6de732489 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -285,7 +285,7 @@ function run_tests # Use awk because bash doesn't support floating point arithmetic. profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }") - if [ "$(rg -c $(basename $test) changed-test-definitions.txt)" -gt 0 ] + if rg --quiet "$(basename $test)" changed-test-definitions.txt then # Run all queries from changed test files to ensure that all new queries will be tested. max_queries=0 diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index e37fd3b7288..b90513acbad 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -73,7 +73,7 @@ A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause. -Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). +Use the `ORDER BY tuple()` syntax, if you do not need sorting, or set `create_table_empty_primary_key_by_default` to `true` to use the `ORDER BY tuple()` syntax by default. See [Selecting the Primary Key](#selecting-the-primary-key). #### PARTITION BY diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index d2e7ab30478..52fa1689d9d 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -380,7 +380,7 @@ build. ### macOS-only: Install with Homebrew -To install ClickHouse using the popular `brew` package manager, follow the instructions listed in the [ClickHouse Homebrew tap](https://github.com/ClickHouse/homebrew-clickhouse). +To install ClickHouse using [homebrew](https://brew.sh/), see [here](https://formulae.brew.sh/cask/clickhouse). ## Launch {#launch} diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 5c52319b0c9..2dac78bb10e 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1395,6 +1395,23 @@ For more information, see the section [Creating replicated tables](../../engines ``` +## replica_group_name {#replica_group_name} + +Replica group name for database Replicated. + +The cluster created by Replicated database will consist of replicas in the same group. +DDL queries will only wait for the replicas in the same group. + +Empty by default. + +**Example** + +``` xml +backups +``` + +Default value: ``. + ## max_open_files {#max-open-files} The maximum number of open files. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 27ac051631a..0a025082860 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2403,6 +2403,17 @@ See also: - [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) +## optimize_trivial_approximate_count_query {#optimize_trivial_approximate_count_query} + +Use an approximate value for trivial count optimization of storages that support such estimation, for example, EmbeddedRocksDB. + +Possible values: + + - 0 — Optimization disabled. + - 1 — Optimization enabled. + +Default value: `0`. + ## optimize_count_from_files {#optimize_count_from_files} Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. @@ -4717,18 +4728,6 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars └─────────────────────┴──────────────────────────┘ ``` -## partial_result_update_duration_ms - -Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET. - -:::note -It's an experimental feature. Enable `allow_experimental_partial_result` setting first to use it. -::: - -## max_rows_in_partial_result - -Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query). - ## validate_tcp_client_information {#validate-tcp-client-information} Determines whether validation of client information enabled when query packet is received from a client using a TCP connection. @@ -4777,3 +4776,18 @@ a Tuple( l Nullable(String) ) ``` + +## dictionary_use_async_executor {#dictionary_use_async_executor} + +Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source. + +You may specify it in `SETTINGS` section of dictionary definition: + +```sql +CREATE DICTIONARY t1_dict ( key String, attr UInt64 ) +PRIMARY KEY key +SOURCE(CLICKHOUSE(QUERY `SELECT key, attr FROM t1 GROUP BY key`)) +LIFETIME(MIN 0 MAX 3600) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +SETTINGS(dictionary_use_async_executor=1, max_threads=8); +``` diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 67aa4babb3f..d06d88598a7 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -55,6 +55,7 @@ keeper foo bar - `rmr ` -- Recursively deletes path. Confirmation required - `flwc ` -- Executes four-letter-word command - `help` -- Prints this message +- `get_direct_children_number [path]` -- Get numbers of direct children nodes under a specific path - `get_all_children_number [path]` -- Get all numbers of children nodes under a specific path - `get_stat [path]` -- Returns the node's stat (default `.`) - `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 73c51a41dfb..40bfb65e4e8 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1081,6 +1081,10 @@ Result: └─────────────────────────────────────────────────────────────┘ ``` +**See also** + +- [arrayFold](#arrayFold) + ## arrayReduceInRanges Applies an aggregate function to array elements in given ranges and returns an array containing the result corresponding to each range. The function will return the same result as multiple `arrayReduce(agg_func, arraySlice(arr1, index, length), ...)`. @@ -1138,17 +1142,41 @@ arrayFold(lambda_function, arr1, arr2, ..., accumulator) Query: ``` sql -SELECT arrayFold( x,acc -> acc + x*2, [1, 2, 3, 4], toInt64(3)) AS res; +SELECT arrayFold( acc,x -> acc + x*2, [1, 2, 3, 4], toInt64(3)) AS res; ``` Result: ``` text -┌─arrayFold(lambda(tuple(x, acc), plus(acc, multiply(x, 2))), [1, 2, 3, 4], toInt64(3))─┐ -│ 3 │ -└───────────────────────────────────────────────────────────────────────────────────────┘ +┌─res─┐ +│ 23 │ +└─────┘ ``` +**Example with the Fibonacci sequence** + +```sql +SELECT arrayFold( acc,x -> (acc.2, acc.2 + acc.1), range(number), (1::Int64, 0::Int64)).1 AS fibonacci +FROM numbers(1,10); + +┌─fibonacci─┐ +│ 0 │ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +│ 5 │ +│ 8 │ +│ 13 │ +│ 21 │ +│ 34 │ +└───────────┘ +``` + +**See also** + +- [arrayReduce](#arrayReduce) + ## arrayReverse(arr) Returns an array of the same size as the original array containing the elements in reverse order. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 9b6eba9b5f0..3fe791a4fb9 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -605,7 +605,7 @@ The first argument can also be specified as [String](../data-types/string.md) in **Returned value** -- The day of the month (1 - 31) of the given date/time +- The day of the week (1-7), depending on the chosen mode, of the given date/time **Example** @@ -1910,6 +1910,7 @@ Result: ``` **See Also** + - [subDate](#subDate) ## timestamp\_add @@ -2053,6 +2054,7 @@ Result: Alias: `ADDDATE` **See Also** + - [date_add](#date_add) ## subDate @@ -2095,6 +2097,7 @@ Result: Alias: `SUBDATE` **See Also** + - [date_sub](#date_sub) ## now {#now} @@ -2388,42 +2391,50 @@ Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-ref Accepts an additional, optional `precision` parameter after the `timezone` parameter. -## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters +## addYears, addQuarters, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addMilliseconds, addMicroseconds, addNanoseconds -Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: +These functions add units of the interval specified by the function name to a date, a date with time or a string-encoded date / date with time. A date or date with time is returned. + +Example: ``` sql WITH - toDate('2018-01-01') AS date, - toDateTime('2018-01-01 00:00:00') AS date_time + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string SELECT addYears(date, 1) AS add_years_with_date, - addYears(date_time, 1) AS add_years_with_date_time + addYears(date_time, 1) AS add_years_with_date_time, + addYears(date_time_string, 1) AS add_years_with_date_time_string ``` ``` text -┌─add_years_with_date─┬─add_years_with_date_time─┐ -│ 2019-01-01 │ 2019-01-01 00:00:00 │ -└─────────────────────┴──────────────────────────┘ +┌─add_years_with_date─┬─add_years_with_date_time─┬─add_years_with_date_time_string─┐ +│ 2025-01-01 │ 2025-01-01 00:00:00 │ 2025-01-01 00:00:00.000 │ +└─────────────────────┴──────────────────────────┴─────────────────────────────────┘ ``` -## subtractYears, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractQuarters +## subtractYears, subtractQuarters, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractMilliseconds, subtractMicroseconds, subtractNanoseconds -Function subtract a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: +These functions subtract units of the interval specified by the function name from a date, a date with time or a string-encoded date / date with time. A date or date with time is returned. + +Example: ``` sql WITH - toDate('2019-01-01') AS date, - toDateTime('2019-01-01 00:00:00') AS date_time + toDate('2024-01-01') AS date, + toDateTime('2024-01-01 00:00:00') AS date_time, + '2024-01-01 00:00:00' AS date_time_string SELECT subtractYears(date, 1) AS subtract_years_with_date, - subtractYears(date_time, 1) AS subtract_years_with_date_time + subtractYears(date_time, 1) AS subtract_years_with_date_time, + subtractYears(date_time_string, 1) AS subtract_years_with_date_time_string ``` ``` text -┌─subtract_years_with_date─┬─subtract_years_with_date_time─┐ -│ 2018-01-01 │ 2018-01-01 00:00:00 │ -└──────────────────────────┴───────────────────────────────┘ +┌─subtract_years_with_date─┬─subtract_years_with_date_time─┬─subtract_years_with_date_time_string─┐ +│ 2023-01-01 │ 2023-01-01 00:00:00 │ 2023-01-01 00:00:00.000 │ +└──────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ ``` ## timeSlots(StartTime, Duration,\[, Size\]) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 31d53ba0359..a4c3ba83351 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -4,7 +4,7 @@ sidebar_position: 105 sidebar_label: JSON --- -There are two sets of functions to parse JSON. +There are two sets of functions to parse JSON. - `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. - `JSONExtract*` is made to parse normal JSON. @@ -367,7 +367,7 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); ``` -:::note +:::note Before version 21.11 the order of arguments was wrong, i.e. JSON_EXISTS(path, json) ::: @@ -394,7 +394,7 @@ Result: [2] String ``` -:::note +:::note Before version 21.11 the order of arguments was wrong, i.e. JSON_QUERY(path, json) ::: @@ -424,7 +424,7 @@ world String ``` -:::note +:::note Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, json) ::: @@ -509,3 +509,34 @@ SELECT │ ᴺᵁᴸᴸ │ 3 │ └─────────────────────┴────────────────────────────┘ ``` + + +## jsonMergePatch + +Returns the merged JSON object string which is formed by merging multiple JSON objects. + +**Syntax** + +``` sql +jsonMergePatch(json1, json2, ...) +``` + +**Arguments** + +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. + +**Returned value** + +- If JSON object strings are valid, return the merged JSON object string. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +``` sql +SELECT jsonMergePatch('{"a":1}', '{"name": "joey"}', '{"name": "tom"}', '{"name": "zoey"}') AS res + +┌─res───────────────────┐ +│ {"a":1,"name":"zoey"} │ +└───────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 0209d59b018..25158f8180d 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -5,19 +5,39 @@ sidebar_label: CHECK TABLE title: "CHECK TABLE Statement" --- -Checks if the data in the table is corrupted. +The `CHECK TABLE` query in ClickHouse is used to perform a validation check on a specific table or its partitions. It ensures the integrity of the data by verifying the checksums and other internal data structures. -``` sql -CHECK TABLE [db.]name [PARTITION partition_expr] +Particularly it compares actual file sizes with the expected values which are stored on the server. If the file sizes do not match the stored values, it means the data is corrupted. This can be caused, for example, by a system crash during query execution. + +:::note +The `CHECK TABLE`` query may read all the data in the table and hold some resources, making it resource-intensive. +Consider the potential impact on performance and resource utilization before executing this query. +::: + +## Syntax + +The basic syntax of the query is as follows: + +```sql +CHECK TABLE table_name [PARTITION partition_expression | PART part_name] [FORMAT format] [SETTINGS check_query_single_value_result = (0|1) [, other_settings]] ``` -The `CHECK TABLE` query compares actual file sizes with the expected values which are stored on the server. If the file sizes do not match the stored values, it means the data is corrupted. This can be caused, for example, by a system crash during query execution. +- `table_name`: Specifies the name of the table that you want to check. +- `partition_expression`: (Optional) If you want to check a specific partition of the table, you can use this expression to specify the partition. +- `part_name`: (Optional) If you want to check a specific part in the table, you can add string literal to specify a part name. +- `FORMAT format`: (Optional) Allows you to specify the output format of the result. +- `SETTINGS`: (Optional) Allows additional settings. + - **`check_query_single_value_result`**: (Optional) This setting allows you to toggle between a detailed result (`0`) or a summarized result (`1`). + - Other settings can be applied as well. If you don't require a deterministic order for the results, you can set max_threads to a value greater than one to speed up the query. -The query response contains the `result` column with a single row. The row has a value of -[Boolean](../../sql-reference/data-types/boolean.md) type: -- 0 - The data in the table is corrupted. -- 1 - The data maintains integrity. +The query response depends on the value of contains `check_query_single_value_result` setting. +In case of `check_query_single_value_result = 1` only `result` column with a single row is returned. Value inside this row is `1` if the integrity check is passed and `0` if data is corrupted. + +With `check_query_single_value_result = 0` the query returns the following columns: + - `part_path`: Indicates the path to the data part or file name. + - `is_passed`: Returns 1 if the check for this part is successful, 0 otherwise. + - `message`: Any additional messages related to the check, such as errors or success messages. The `CHECK TABLE` query supports the following table engines: @@ -26,30 +46,15 @@ The `CHECK TABLE` query supports the following table engines: - [StripeLog](../../engines/table-engines/log-family/stripelog.md) - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) -Performed over the tables with another table engines causes an exception. +Performed over the tables with another table engines causes an `NOT_IMPLEMETED` exception. Engines from the `*Log` family do not provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner. -## Checking the MergeTree Family Tables +## Examples -For `MergeTree` family engines, if [check_query_single_value_result](../../operations/settings/settings.md#check_query_single_value_result) = 0, the `CHECK TABLE` query shows a check status for every individual data part of a table on the local server. +By default `CHECK TABLE` query shows the general table check status: ```sql -SET check_query_single_value_result = 0; -CHECK TABLE test_table; -``` - -```text -┌─part_path─┬─is_passed─┬─message─┐ -│ all_1_4_1 │ 1 │ │ -│ all_1_4_2 │ 1 │ │ -└───────────┴───────────┴─────────┘ -``` - -If `check_query_single_value_result` = 1, the `CHECK TABLE` query shows the general table check status. - -```sql -SET check_query_single_value_result = 1; CHECK TABLE test_table; ``` @@ -59,11 +64,86 @@ CHECK TABLE test_table; └────────┘ ``` +If you want to see the check status for every individual data part you may use `check_query_single_value_result` setting. + +Also, to check a specific partition of the table, you can use the `PARTITION` keyword. + +```sql +CHECK TABLE t0 PARTITION ID '201003' +FORMAT PrettyCompactMonoBlock +SETTINGS check_query_single_value_result = 0 +``` + +Output: + +```text +┌─part_path────┬─is_passed─┬─message─┐ +│ 201003_7_7_0 │ 1 │ │ +│ 201003_3_3_0 │ 1 │ │ +└──────────────┴───────────┴─────────┘ +``` + +Similarly, you can check a specific part of the table by using the `PART` keyword. + +```sql +CHECK TABLE t0 PART '201003_7_7_0' +FORMAT PrettyCompactMonoBlock +SETTINGS check_query_single_value_result = 0 +``` + +Output: + +```text +┌─part_path────┬─is_passed─┬─message─┐ +│ 201003_7_7_0 │ 1 │ │ +└──────────────┴───────────┴─────────┘ +``` + +Note that when part does not exist, the query returns an error: + +```sql +CHECK TABLE t0 PART '201003_111_222_0' +``` + +```text +DB::Exception: No such data part '201003_111_222_0' to check in table 'default.t0'. (NO_SUCH_DATA_PART) +``` + +### Receiving a 'Corrupted' Result + +:::warning +Disclaimer: The procedure described here, including the manual manipulating or removing files directly from the data directory, is for experimental or development environments only. Do **not** attempt this on a production server, as it may lead to data loss or other unintended consequences. +::: + +Remove the existing checksum file: + +```bash +rm /var/lib/clickhouse-server/data/default/t0/201003_3_3_0/checksums.txt +``` + +```sql +CHECK TABLE t0 PARTITION ID '201003' +FORMAT PrettyCompactMonoBlock +SETTINGS check_query_single_value_result = 0 + + +Output: + +```text +┌─part_path────┬─is_passed─┬─message──────────────────────────────────┐ +│ 201003_7_7_0 │ 1 │ │ +│ 201003_3_3_0 │ 1 │ Checksums recounted and written to disk. │ +└──────────────┴───────────┴──────────────────────────────────────────┘ +``` + +If the checksums.txt file is missing, it can be restored. It will be recalculated and rewritten during the execution of the CHECK TABLE command for the specific partition, and the status will still be reported as 'success.'" + + ## If the Data Is Corrupted If the table is corrupted, you can copy the non-corrupted data to another table. To do this: 1. Create a new table with the same structure as damaged table. To do this execute the query `CREATE TABLE AS `. -2. Set the [max_threads](../../operations/settings/settings.md#settings-max_threads) value to 1 to process the next query in a single thread. To do this run the query `SET max_threads = 1`. +2. Set the `max_threads` value to 1 to process the next query in a single thread. To do this run the query `SET max_threads = 1`. 3. Execute the query `INSERT INTO SELECT * FROM `. This request copies the non-corrupted data from the damaged table to another table. Only the data before the corrupted part will be copied. 4. Restart the `clickhouse-client` to reset the `max_threads` value. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 11026340a0f..2a8d6788889 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -62,7 +62,7 @@ Materialized views store data transformed by the corresponding [SELECT](../../.. When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data. -When creating a materialized view with `TO [db].[table]`, you must not use `POPULATE`. +When creating a materialized view with `TO [db].[table]`, you can't also use `POPULATE`. A materialized view is implemented as follows: when inserting data to the table specified in `SELECT`, part of the inserted data is converted by this `SELECT` query, and the result is inserted in the view. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 38922e964b8..281a1d0436c 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -334,6 +334,7 @@ For multiple `JOIN` clauses in a single `SELECT` query: - Taking all the columns via `*` is available only if tables are joined, not subqueries. - The `PREWHERE` clause is not available. +- The `USING` clause is not available. For `ON`, `WHERE`, and `GROUP BY` clauses: diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 21bd674bd24..d19bf05aca8 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -473,7 +473,7 @@ Shows all [users](../../guides/sre/user-management/index.md#user-account-managem ``` sql SHOW ACCESS ``` -## SHOW CLUSTER(s) +## SHOW CLUSTER(S) Returns a list of clusters. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table. @@ -609,6 +609,18 @@ Result: └──────────────────┴────────┴─────────────┘ ``` +## SHOW SETTING + +``` sql +SHOW SETTING +``` + +Outputs setting value for specified setting name. + +**See Also** +- [system.settings](../../operations/system-tables/settings.md) table + + ## SHOW FILESYSTEM CACHES ```sql @@ -651,3 +663,47 @@ If either `LIKE` or `ILIKE` clause is specified, the query returns a list of sys **See Also** - [system.functions](../../operations/system-tables/functions.md) table + +## SHOW MERGES + +Returns a list of merges. All merges are listed in the [system.merges](../../operations/system-tables/merges.md) table. + + +**Syntax** + +``` sql +SHOW MERGES [[NOT] LIKE|ILIKE ''] [LIMIT ] +``` + +**Examples** + +Query: + +``` sql +SHOW MERGES; +``` + +Result: + +```text +┌─table──────┬─database─┬─estimate_complete─┬─────elapsed─┬─progress─┬─is_mutation─┬─size─────┬─mem───────┐ +│ your_table │ default │ 0.14 │ 0.365592338 │ 0.73 │ 0 │ 5.40 MiB │ 10.25 MiB │ +└────────────┴──────────┴───────────────────┴─────────────┴──────────┴─────────────┴────────────┴─────────┘ + +``` + +Query: + +``` sql +SHOW MERGES LIKE 'your_t%' LIMIT 1; +``` + +Result: + +```text +┌─table──────┬─database─┬─estimate_complete─┬─────elapsed─┬─progress─┬─is_mutation─┬─size─────┬─mem───────┐ +│ your_table │ default │ 0.05 │ 1.727629065 │ 0.97 │ 0 │ 5.40 MiB │ 10.25 MiB │ +└────────────┴──────────┴───────────────────┴─────────────┴──────────┴─────────────┴────────────┴─────────┘ + +``` + diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index 1a6fbade06f..9c4a503a276 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -6,21 +6,21 @@ sidebar_label: "Обзор архитектуры ClickHouse" # Обзор архитектуры ClickHouse {#overview-of-clickhouse-architecture} -ClickHouse - полноценная колоночная СУБД. Данные хранятся в колонках, а в процессе обработки - в массивах (векторах или фрагментах (chunk’ах) колонок). По возможности операции выполняются на массивах, а не на индивидуальных значениях. Это называется “векторизованное выполнения запросов” (vectorized query execution), и помогает снизить стоимость фактической обработки данных. +ClickHouse — полноценная столбцовая СУБД. Данные хранятся в столбцах, а в процессе обработки — в массивах (векторах или фрагментах столбцов — chunks). По возможности операции выполняются на массивах, а не на индивидуальных значениях. Это называется “векторизованное выполнения запросов” (vectorized query execution), и помогает снизить стоимость фактической обработки данных. -> Эта идея не нова. Такой подход использовался в `APL` (A programming language, 1957) и его потомках: `A +` (диалект `APL`), `J` (1990), `K` (1993) и `Q` (язык программирования Kx Systems, 2003). Программирование на массивах (Array programming) используется в научных вычислительных системах. Эта идея не является чем-то новым и для реляционных баз данных: например, она используется в системе `VectorWise` (так же известной как Actian Vector Analytic Database от Actian Corporation). +> Эта идея не нова. Такой подход использовался в языке `APL` (A programming language, 1957) и его потомках: `A +` (диалект `APL`), `J` (1990), `K` (1993) и `Q` (язык программирования Kx Systems, 2003). Программирование на массивах (array programming) используется в научных вычислительных системах. Эта идея не является чем-то новым и для реляционных баз данных: например, она используется в системе `VectorWise` (так же известной как Actian Vector Analytic Database от Actian Corporation). -Существует два различных подхода для увеличения скорости обработки запросов: выполнение векторизованного запроса и генерация кода во время выполнения (runtime code generation). В последнем случае код генерируется на лету для каждого типа запроса, удаляя все косвенные и динамические обращения. Ни один из этих подходов не имеет явного преимущества. Генерация кода во время выполнения выигрывает, если объединяет большое число операций, таким образом полностью используя вычислительные блоки и конвейер CPU. Выполнение векторизованного запроса может быть менее практично потому, что задействует временные векторы данных, которые должны быть записаны и прочитаны из кэша. Если временные данные не помещаются в L2 кэш, будут проблемы. С другой стороны выполнение векторизованного запроса упрощает использование SIMD инструкций CPU. [Научная работа](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) наших друзей показывает преимущества сочетания обоих подходов. ClickHouse использует выполнение векторизованного запроса и имеет ограниченную начальную поддержку генерации кода во время выполнения. +Существует два различных подхода для увеличения скорости обработки запросов: выполнение векторизованного запроса и генерация кода во время выполнения (runtime code generation). В последнем случае код генерируется на лету для каждого типа запроса, и удаляются все косвенные и динамические обращения. Ни один из этих подходов не имеет явного преимущества. Генерация кода во время выполнения выигрывает, если объединяет большое число операций, таким образом полностью используя вычислительные блоки и конвейер CPU. Выполнение векторизованного запроса может быть менее практично потому, что задействует временные векторы данных, которые должны быть записаны и прочитаны из кэша. Если временные данные не помещаются в L2-кэш, будут проблемы. С другой стороны выполнение векторизованного запроса упрощает использование SIMD-инструкций CPU. [Научная работа](http://15721.courses.cs.cmu.edu/spring2016/papers/p5-sompolski.pdf) наших друзей показывает преимущества сочетания обоих подходов. ClickHouse использует выполнение векторизованного запроса и имеет ограниченную начальную поддержку генерации кода во время выполнения. -## Колонки {#columns} +## Столбцы {#columns} -Для представления столбцов в памяти (фактически, фрагментов столбцов) используется интерфейс `IColumn`. Интерфейс предоставляет вспомогательные методы для реализации различных реляционных операторов. Почти все операции иммутабельные: они не изменяют оригинальных колонок, а создают новую с измененными значениями. Например, метод `IColumn :: filter` принимает фильтр - набор байт. Он используется для реляционных операторов `WHERE` и `HAVING`. Другой пример: метод `IColumn :: permute` используется для поддержки `ORDER BY`, метод `IColumn :: cut` - `LIMIT` и т. д. +Для представления столбцов в памяти (фактически, фрагментов столбцов) используется интерфейс `IColumn`. Интерфейс предоставляет вспомогательные методы для реализации различных реляционных операторов. Почти все операции не изменяют данные (immutable): они не изменяют содержимое столбцов, а создают новые с изменёнными значениями. Например, метод `IColumn :: filter` принимает фильтр — набор байтов. Он используется для реляционных операторов `WHERE` и `HAVING`. Другой пример: метод `IColumn :: permute` используется для поддержки `ORDER BY`, метод `IColumn :: cut` — `LIMIT` и т. д. -Различные реализации `IColumn` (`ColumnUInt8`, `ColumnString` и т. д.) отвечают за распределение данных колонки в памяти. Для колонок целочисленного типа это один смежный массив, такой как `std :: vector`. Для колонок типа `String` и `Array` это два вектора: один для всех элементов массивов, располагающихся смежно, второй для хранения смещения до начала каждого массива. Также существует реализация `ColumnConst`, в которой хранится только одно значение в памяти, но выглядит как колонка. +Различные реализации `IColumn` (`ColumnUInt8`, `ColumnString` и т. д.) отвечают за распределение данных столбца в памяти. Для столбцов целочисленного типа — это один смежный массив, такой как `std :: vector`. Для столбцов типа `String` и `Array` — это два вектора: один для всех элементов массивов, располагающихся смежно, второй для хранения смещения до начала каждого массива. Также существует реализация `ColumnConst`, в которой хранится только одно значение в памяти, но выглядит как столбец. ## Поля {#field} -Тем не менее, можно работать и с индивидуальными значениями. Для представления индивидуальных значений используется `Поле` (`Field`). `Field` - размеченное объединение `UInt64`, `Int64`, `Float64`, `String` и `Array`. `IColumn` имеет метод `оператор []` для получения значения по индексу n как `Field`, а также метод insert для добавления `Field` в конец колонки. Эти методы не очень эффективны, так как требуют временных объектов `Field`, представляющих индивидуальное значение. Есть более эффективные методы, такие как `insertFrom`, `insertRangeFrom` и т.д. +Тем не менее, можно работать и с индивидуальными значениями. Для представления индивидуальных значений используется `Поле` (`Field`). `Field` — размеченное объединение `UInt64`, `Int64`, `Float64`, `String` и `Array`. `IColumn` имеет метод `оператор []` для получения значения по индексу n как `Field`, а также метод insert для добавления `Field` в конец колонки. Эти методы не очень эффективны, так как требуют временных объектов `Field`, представляющих индивидуальное значение. Есть более эффективные методы, такие как `insertFrom`, `insertRangeFrom` и т.д. `Field` не несет в себе достаточно информации о конкретном типе данных в таблице. Например, `UInt8`, `UInt16`, `UInt32` и `UInt64` в `Field` представлены как `UInt64`. @@ -28,12 +28,12 @@ ClickHouse - полноценная колоночная СУБД. Данные `IColumn` предоставляет методы для общих реляционных преобразований данных, но они не отвечают всем потребностям. Например, `ColumnUInt64` не имеет метода для вычисления суммы двух столбцов, а `ColumnString` не имеет метода для запуска поиска по подстроке. Эти бесчисленные процедуры реализованы вне `IColumn`. -Различные функции на колонках могут быть реализованы обобщенным, неэффективным путем, используя `IColumn` методы для извлечения значений `Field`, или специальным путем, используя знания о внутреннем распределение данных в памяти в конкретной реализации `IColumn`. Для этого функции приводятся к конкретному типу `IColumn` и работают напрямую с его внутренним представлением. Например, в `ColumnUInt64` есть метод `getData`, который возвращает ссылку на внутренний массив, чтение и заполнение которого, выполняется отдельной процедурой напрямую. Фактически, мы имеем "дырявые абстракции", обеспечивающие эффективные специализации различных процедур. +Различные функции на столбцах могут быть реализованы обобщённым, неэффективным путем, используя `IColumn`-методы для извлечения значений `Field`, или специальным путем, используя знания о внутреннем распределение данных в памяти в конкретной реализации `IColumn`. Для этого функции приводятся к конкретному типу `IColumn` и работают напрямую с его внутренним представлением. Например, в `ColumnUInt64` есть метод `getData`, который возвращает ссылку на внутренний массив, чтение и заполнение которого, выполняется отдельной процедурой напрямую. Фактически, мы имеем “дырявые абстракции”, обеспечивающие эффективные специализации различных процедур. ## Типы данных (Data Types) {#data_types} -`IDataType` отвечает за сериализацию и десериализацию - чтение и запись фрагментов колонок или индивидуальных значений в бинарном или текстовом формате. -`IDataType` точно соответствует типам данных в таблицах - `DataTypeUInt32`, `DataTypeDateTime`, `DataTypeString` и т. д. +`IDataType` отвечает за сериализацию и десериализацию — чтение и запись фрагментов столбцов или индивидуальных значений в бинарном или текстовом формате. +`IDataType` точно соответствует типам данных в таблицах — `DataTypeUInt32`, `DataTypeDateTime`, `DataTypeString` и т. д. `IDataType` и `IColumn` слабо связаны друг с другом. Различные типы данных могут быть представлены в памяти с помощью одной реализации `IColumn`. Например, и `DataTypeUInt32`, и `DataTypeDateTime` в памяти представлены как `ColumnUInt32` или `ColumnConstUInt32`. В добавок к этому, один тип данных может быть представлен различными реализациями `IColumn`. Например, `DataTypeUInt8` может быть представлен как `ColumnUInt8` и `ColumnConstUInt8`. @@ -43,11 +43,11 @@ ClickHouse - полноценная колоночная СУБД. Данные ## Блоки (Block) {#block} -`Block` это контейнер, который представляет фрагмент (chunk) таблицы в памяти. Это набор троек - `(IColumn, IDataType, имя колонки)`. В процессе выполнения запроса, данные обрабатываются `Block`-ами. Если у нас есть `Block`, значит у нас есть данные (в объекте `IColumn`), информация о типе (в `IDataType`), которая говорит нам, как работать с колонкой, и имя колонки (оригинальное имя колонки таблицы или служебное имя, присвоенное для получения промежуточных результатов вычислений). +`Block` — это контейнер, который представляет фрагмент (chunk) таблицы в памяти. Это набор троек — `(IColumn, IDataType, имя столбца)`. В процессе выполнения запроса, данные обрабатываются блоками (`Block`). Если есть `Block`, значит у нас есть данные (в объекте `IColumn`), информация о типе (в `IDataType`), которая говорит, как работать со столбцов, и имя столбца (оригинальное имя столбца таблицы или служебное имя, присвоенное для получения промежуточных результатов вычислений). -При вычислении некоторой функции на колонках в блоке мы добавляем еще одну колонку с результатами в блок, не трогая колонки аргументов функции, потому что операции иммутабельные. Позже ненужные колонки могут быть удалены из блока, но не модифицированы. Это удобно для устранения общих подвыражений. +При вычислении некоторой функции на столбцах в блоке добавляется ещё один столбец с результатами в блок, не трогая колонки аргументов функции, потому что операции иммутабельные. Позже ненужные столбцы могут быть удалены из блока, но не модифицированы. Это удобно для устранения общих подвыражений. -Блоки создаются для всех обработанных фрагментов данных. Напоминаем, что одни и те же типы вычислений, имена колонок и типы переиспользуются в разных блоках и только данные колонок изменяются. Лучше разделить данные и заголовок блока потому, что в блоках маленького размера мы имеем большой оверхэд по временным строкам при копировании умных указателей (`shared_ptrs`) и имен колонок. +Блоки создаются для всех обработанных фрагментов данных. Напоминаем, что одни и те же типы вычислений, имена столбцов и типы переиспользуются в разных блоках и только данные колонок изменяются. Лучше разделить данные и заголовок блока потому, что в блоках маленького размера мы имеем большой оверхэд по временным строкам при копировании умных указателей (`shared_ptrs`) и имен столбцов. ## Потоки блоков (Block Streams) {#block-streams} @@ -73,13 +73,13 @@ ClickHouse - полноценная колоночная СУБД. Данные ## I/O {#io} -Для байт-ориентированных ввода/вывода существуют абстрактные классы `ReadBuffer` и `WriteBuffer`. Они используются вместо C++ `iostream`. Не волнуйтесь: каждый зрелый проект C++ использует что-то другое вместо `iostream` по уважительным причинам. +Для байт-ориентированного ввода-вывода существуют абстрактные классы `ReadBuffer` и `WriteBuffer`. Они используются вместо `iostream`. Не волнуйтесь: каждый зрелый проект C++ использует что-то другое вместо `iostream` по уважительным причинам. `ReadBuffer` и `WriteBuffer` это просто непрерывный буфер и курсор, указывающий на позицию в этом буфере. Реализации могут как владеть так и не владеть памятью буфера. Существует виртуальный метод заполнения буфера следующими данными (для `ReadBuffer`) или сброса буфера куда-нибудь (например `WriteBuffer`). Виртуальные методы редко вызываются. Реализации `ReadBuffer`/`WriteBuffer` используются для работы с файлами и файловыми дескрипторами, а также сетевыми сокетами, для реализации сжатия (`CompressedWriteBuffer` инициализируется вместе с другим `WriteBuffer` и осуществляет сжатие данных перед записью в него), и для других целей – названия `ConcatReadBuffer`, `LimitReadBuffer`, и `HashingWriteBuffer` говорят сами за себя. -Буферы чтения/записи имеют дело только с байтами. В заголовочных файлах `ReadHelpers` и `WriteHelpers` объявлены некоторые функции, чтобы помочь с форматированием ввода/вывода. Например, есть помощники для записи числа в десятичном формате. +Буферы чтения-записи имеют дело только с байтами. В заголовочных файлах `ReadHelpers` и `WriteHelpers` объявлены некоторые функции, чтобы помочь с форматированием ввода-вывода. Например, есть помощники для записи числа в десятичном формате. Давайте посмотрим, что происходит, когда вы хотите вывести результат в `JSON` формате в стандартный вывод (stdout). У вас есть результирующий набор данных, готовый к извлечению из `IBlockInputStream`. Вы создаете `WriteBufferFromFileDescriptor(STDOUT_FILENO)` чтобы записать байты в stdout. Вы создаете `JSONRowOutputStream`, инициализируете с этим `WriteBuffer`'ом, чтобы записать строки `JSON` в stdout. Кроме того вы создаете `BlockOutputStreamFromRowOutputStream`, реализуя `IBlockOutputStream`. Затем вызывается `copyData` для передачи данных из `IBlockInputStream` в `IBlockOutputStream` и все работает. Внутренний `JSONRowOutputStream` будет писать в формате `JSON` различные разделители и вызвать `IDataType::serializeTextJSON` метод со ссылкой на `IColumn` и номер строки в качестве аргументов. Следовательно, `IDataType::serializeTextJSON` вызовет метод из `WriteHelpers.h`: например, `writeText` для числовых типов и `writeJSONString` для `DataTypeString`. @@ -93,7 +93,7 @@ ClickHouse - полноценная колоночная СУБД. Данные Но есть и заметные исключения: -- AST запрос, передающийся в метод `read`, может использоваться движком таблицы для получения информации о возможности использования индекса и считывания меньшего количества данных из таблицы. +- AST-запрос, передающийся в метод `read`, может использоваться движком таблицы для получения информации о возможности использования индекса и считывания меньшего количества данных из таблицы. - Иногда движок таблиц может сам обрабатывать данные до определенного этапа. Например, `StorageDistributed` можно отправить запрос на удаленные серверы, попросить их обработать данные до этапа, когда данные с разных удаленных серверов могут быть объединены, и вернуть эти предварительно обработанные данные. Затем интерпретатор запросов завершает обработку данных. Метод `read` может возвращать несколько объектов `IBlockInputStream`, позволяя осуществлять параллельную обработку данных. Эти несколько блочных входных потоков могут считываться из таблицы параллельно. Затем вы можете обернуть эти потоки различными преобразованиями (такими как вычисление выражений или фильтрация), которые могут быть вычислены независимо, и создать `UnionBlockInputStream` поверх них, чтобы читать из нескольких потоков параллельно. @@ -104,11 +104,11 @@ ClickHouse - полноценная колоночная СУБД. Данные > В качестве результата выполнения метода `read`, `IStorage` возвращает `QueryProcessingStage` – информацию о том, какие части запроса были обработаны внутри хранилища. -## Парсеры (Parsers) {#parsers} +## Разборщики (Parsers) {#parsers} -Написанный от руки парсер, анализирующий запрос, работает по методу рекурсивного спуска. Например, `ParserSelectQuery` просто рекурсивно вызывает нижестоящие парсеры для различных частей запроса. Парсеры создают абстрактное синтаксическое дерево (`AST`). `AST` представлен узлами, которые являются экземплярами `IAST`. +Написанный от руки разборщик, анализирующий запрос, работает по методу рекурсивного спуска. Например, `ParserSelectQuery` просто рекурсивно вызывает нижестоящие разборщики для различных частей запроса. Разборщики создают абстрактное синтаксическое дерево (`AST`). `AST` представлен узлами, которые являются экземплярами `IAST`. -> Генераторы парсеров не используются по историческим причинам. +> Генераторы разборщиков не используются по историческим причинам. ## Интерпретаторы {#interpreters} @@ -134,7 +134,7 @@ ClickHouse имеет сильную типизацию, поэтому нет ## Агрегатные функции {#aggregate-functions} -Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`). +Агрегатные функции — это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`). Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание. @@ -146,18 +146,18 @@ ClickHouse имеет сильную типизацию, поэтому нет Сервер предоставляет несколько различных интерфейсов. -- HTTP интерфейс для любых сторонних клиентов. -- TCP интерфейс для родного ClickHouse клиента и межсерверной взаимодействия при выполнении распределенных запросов. +- HTTP-интерфейс для любых сторонних клиентов. +- TCP-интерфейс для родного ClickHouse-клиента и межсерверной взаимодействия при выполнении распределенных запросов. - Интерфейс для передачи данных при репликации. -Внутри простой многопоточный сервер без корутин (coroutines), файберов (fibers) и т.д. Поскольку сервер не предназначен для обработки большого количества простых запросов, а ориентирован на обработку сложных запросов относительно низкой интенсивности, каждый из потоков может обрабатывать огромное количество аналитических запросов. +Внутри простой многопоточный сервер без сопрограмм (coroutines), фиберов (fibers) и т. д. Поскольку сервер не предназначен для обработки большого количества простых запросов, а ориентирован на обработку сложных запросов относительно низкой интенсивности, каждый из потоков может обрабатывать огромное количество аналитических запросов. -Сервер инициализирует класс `Context`, где хранит необходимое для выполнения запроса окружение: список доступных баз данных, пользователей и прав доступа, настройки, кластеры, список процессов, журнал запросов и т.д. Это окружение используется интерпретаторами. +Сервер инициализирует класс `Context`, где хранит необходимое для выполнения запроса окружение: список доступных баз данных, пользователей и прав доступа, настройки, кластеры, список процессов, журнал запросов и т. д. Это окружение используется интерпретаторами. -Мы поддерживаем полную обратную и прямую совместимость для TCP интерфейса: старые клиенты могут общаться с новыми серверами, а новые клиенты могут общаться со старыми серверами. Но мы не хотим поддерживать его вечно и прекращаем поддержку старых версий примерно через год. +Мы поддерживаем полную обратную и прямую совместимость для TCP-интерфейса: старые клиенты могут общаться с новыми серверами, а новые клиенты могут общаться со старыми серверами. Но мы не хотим поддерживать его вечно и прекращаем поддержку старых версий примерно через год. :::note Примечание -Для всех сторонних приложений мы рекомендуем использовать HTTP интерфейс, потому что он прост и удобен в использовании. TCP интерфейс тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных и использует специальное кадрирование для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что потребовала бы линковки большей части кодовой базы ClickHouse, что непрактично. +Для всех сторонних приложений мы рекомендуем использовать HTTP-интерфейс, потому что он прост и удобен в использовании. TCP-интерфейс тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных и использует специальное кадрирование для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что потребовала бы линковки большей части кодовой базы ClickHouse, что непрактично. ::: ## Выполнение распределенных запросов (Distributed Query Execution) {#distributed-query-execution} @@ -169,15 +169,15 @@ ClickHouse имеет сильную типизацию, поэтому нет ## Merge Tree {#merge-tree} -`MergeTree` - это семейство движков хранения, поддерживающих индексацию по первичному ключу. Первичный ключ может быть произвольным набором (кортежем) столбцов или выражений. Данные в таблице `MergeTree` хранятся "частями" (“parts”). Каждая часть хранит данные отсортированные по первичному ключу (данные упорядочены лексикографически). Все столбцы таблицы хранятся в отдельных файлах `column.bin` в этих частях. Файлы состоят из сжатых блоков. Каждый блок обычно содержит от 64 КБ до 1 МБ несжатых данных, в зависимости от среднего значения размера данных. Блоки состоят из значений столбцов, расположенных последовательно один за другим. Значения столбцов находятся в одинаковом порядке для каждого столбца (порядок определяется первичным ключом), поэтому, когда вы выполняете итерацию по многим столбцам, вы получаете значения для соответствующих строк. +`MergeTree` — это семейство движков хранения, поддерживающих индексацию по первичному ключу. Первичный ключ может быть произвольным набором (кортежем) столбцов или выражений. Данные в таблице `MergeTree` хранятся "частями" (“parts”). Каждая часть хранит данные отсортированные по первичному ключу (данные упорядочены лексикографически). Все столбцы таблицы хранятся в отдельных файлах `column.bin` в этих частях. Файлы состоят из сжатых блоков. Каждый блок обычно содержит от 64 КБ до 1 МБ несжатых данных, в зависимости от среднего значения размера данных. Блоки состоят из значений столбцов, расположенных последовательно один за другим. Значения столбцов находятся в одинаковом порядке для каждого столбца (порядок определяется первичным ключом), поэтому, когда вы выполняете итерацию по многим столбцам, вы получаете значения для соответствующих строк. -Сам первичный ключ является "разреженным" ("sparse"). Он не относится к каждой отдельной строке, а только к некоторым диапазонам данных. Отдельный файл «primary.idx» имеет значение первичного ключа для каждой N-й строки, где N называется гранулярностью индекса ("index_granularity", обычно N = 8192). Также для каждого столбца у нас есть файлы `column.mrk` с "метками" ("marks"), которые обозначают смещение для каждой N-й строки в файле данных. Каждая метка представляет собой пару: смещение начала сжатого блока от начала файла и смещение к началу данных в распакованном блоке. Обычно сжатые блоки выравниваются по меткам, а смещение в распакованном блоке равно нулю. Данные для `primary.idx` всегда находятся в памяти, а данные для файлов `column.mrk` кэшируются. +Сам первичный ключ является “разреженным” (sparse). Он не относится к каждой отдельной строке, а только к некоторым диапазонам данных. Отдельный файл «primary.idx» имеет значение первичного ключа для каждой N-й строки, где N называется гранулярностью индекса (index_granularity, обычно N = 8192). Также для каждого столбца у нас есть файлы `column.mrk` с "метками" ("marks"), которые обозначают смещение для каждой N-й строки в файле данных. Каждая метка представляет собой пару: смещение начала сжатого блока от начала файла и смещение к началу данных в распакованном блоке. Обычно сжатые блоки выравниваются по меткам, а смещение в распакованном блоке равно нулю. Данные для `primary.idx` всегда находятся в памяти, а данные для файлов `column.mrk` кэшируются. Когда мы собираемся читать что-то из части данных `MergeTree`, мы смотрим содержимое `primary.idx` и определяем диапазоны, которые могут содержать запрошенные данные, затем просматриваем содержимое `column.mrk` и вычисляем смещение, чтобы начать чтение этих диапазонов. Из-за разреженности могут быть прочитаны лишние данные. ClickHouse не подходит для простых точечных запросов высокой интенсивности, потому что весь диапазон строк размером `index_granularity` должен быть прочитан для каждого ключа, а сжатый блок должен быть полностью распакован для каждого столбца. Мы сделали индекс разреженным, потому что мы должны иметь возможность поддерживать триллионы строк на один сервер без существенных расходов памяти на индексацию. Кроме того, поскольку первичный ключ является разреженным, он не уникален: он не может проверить наличие ключа в таблице во время INSERT. Вы можете иметь множество строк с одним и тем же ключом в таблице. При выполнении `INSERT` для группы данных в `MergeTree`, элементы группы сортируются по первичному ключу и образует новую “часть”. Фоновые потоки периодически выбирают некоторые части и объединяют их в одну отсортированную часть, чтобы сохранить относительно небольшое количество частей. Вот почему он называется `MergeTree`. Конечно, объединение приводит к повышению интенсивности записи. Все части иммутабельные: они только создаются и удаляются, но не изменяются. Когда выполняется `SELECT`, он содержит снимок таблицы (набор частей). После объединения старые части также сохраняются в течение некоторого времени, чтобы упростить восстановление после сбоя, поэтому, если мы видим, что какая-то объединенная часть, вероятно, повреждена, мы можем заменить ее исходными частями. -`MergeTree` не является деревом LSM (Log-structured merge-tree — журнально-структурированное дерево со слиянием), потому что оно не содержит «memtable» и «log»: вставленные данные записываются непосредственно в файловую систему. Это делает его пригодным только для вставки данных в пакетах, а не по отдельным строкам и не очень часто - примерно раз в секунду это нормально, а тысячу раз в секунду - нет. Мы сделали это для простоты и потому, что мы уже вставляем данные в пакеты в наших приложениях. +`MergeTree` не является LSM (Log-structured merge-tree — журнально-структурированным деревом со слиянием), потому что оно не содержит «memtable» и «log»: вставленные данные записываются непосредственно в файловую систему. Это делает его пригодным только для вставки данных в пакетах, а не по отдельным строкам и не очень часто — примерно раз в секунду это нормально, а тысячу раз в секунду - нет. Мы сделали это для простоты и потому, что мы уже вставляем данные в пакеты в наших приложениях. > Таблицы `MergeTree` могут иметь только один (первичный) индекс: вторичных индексов нет. Было бы неплохо разрешить несколько физических представлениям в одной логической таблице, например, хранить данные в более чем одном физическом порядке или даже разрешить представления с предварительно агрегированными данными вместе с исходными данными. @@ -189,7 +189,7 @@ ClickHouse имеет сильную типизацию, поэтому нет Репликация реализована в движке таблицы `ReplicatedMergeTree`. Путь в `ZooKeeper` указывается в качестве параметра движка. Все таблицы с одинаковым путем в `ZooKeeper` становятся репликами друг друга: они синхронизируют свои данные и поддерживают согласованность. Реплики можно добавлять и удалять динамически, просто создавая или удаляя таблицу. -Репликация использует асинхронную multi-master схему. Вы можете вставить данные в любую реплику, которая имеет открытую сессию в `ZooKeeper`, и данные реплицируются на все другие реплики асинхронно. Поскольку ClickHouse не поддерживает UPDATE, репликация исключает конфликты (conflict-free replication). Поскольку подтверждение вставок кворумом не реализовано, только что вставленные данные могут быть потеряны в случае сбоя одного узла. +Репликация использует асинхронную multi-master-схему. Вы можете вставить данные в любую реплику, которая имеет открытую сессию в `ZooKeeper`, и данные реплицируются на все другие реплики асинхронно. Поскольку ClickHouse не поддерживает UPDATE, репликация исключает конфликты (conflict-free replication). Поскольку подтверждение вставок кворумом не реализовано, только что вставленные данные могут быть потеряны в случае сбоя одного узла. Метаданные для репликации хранятся в `ZooKeeper`. Существует журнал репликации, в котором перечислены действия, которые необходимо выполнить. Среди этих действий: получить часть (get the part); объединить части (merge parts); удалить партицию (drop a partition) и так далее. Каждая реплика копирует журнал репликации в свою очередь, а затем выполняет действия из очереди. Например, при вставке в журнале создается действие «получить часть» (get the part), и каждая реплика загружает эту часть. Слияния координируются между репликами, чтобы получить идентичные до байта результаты. Все части объединяются одинаково на всех репликах. Одна из реплик-лидеров инициирует новое слияние кусков первой и записывает действия «слияния частей» в журнал. Несколько реплик (или все) могут быть лидерами одновременно. Реплике можно запретить быть лидером с помощью `merge_tree` настройки `replicated_can_become_leader`. @@ -198,7 +198,7 @@ ClickHouse имеет сильную типизацию, поэтому нет Кроме того, каждая реплика сохраняет свое состояние в `ZooKeeper` в виде набора частей и его контрольных сумм. Когда состояние в локальной файловой системе расходится с эталонным состоянием в `ZooKeeper`, реплика восстанавливает свою согласованность путем загрузки отсутствующих и поврежденных частей из других реплик. Когда в локальной файловой системе есть неожиданные или испорченные данные, ClickHouse не удаляет их, а перемещает в отдельный каталог и забывает об этом. :::note Примечание -Кластер ClickHouse состоит из независимых шардов, а каждый шард состоит из реплик. Кластер **не является эластичным** (not elastic), поэтому после добавления нового шарда данные не будут автоматически распределены между ними. Вместо этого нужно изменить настройки, чтобы выровнять нагрузку на кластер. Эта реализация дает вам больший контроль, и вполне приемлема для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в эксплуатации, такой подход становится существенным недостатком. Движки таблиц, которые охватывают весь кластер с динамически реплицируемыми областями, которые могут быть автоматически разделены и сбалансированы между кластерами, еще предстоит реализовать. +Кластер ClickHouse состоит из независимых сегментов (shards), а каждый сегмент состоит из реплик. Кластер **не является эластичным** (not elastic), поэтому после добавления нового сегмента данные не будут автоматически распределены между ними. Вместо этого нужно изменить настройки, чтобы выровнять нагрузку на кластер. Эта реализация дает вам больший контроль, и вполне приемлема для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в эксплуатации, такой подход становится существенным недостатком. Движки таблиц, которые охватывают весь кластер с динамически реплицируемыми областями, которые могут быть автоматически разделены и сбалансированы между кластерами, еще предстоит реализовать. ::: {## [Original article](https://clickhouse.com/docs/ru/development/architecture/) ##} diff --git a/docs/ru/index.md b/docs/ru/index.md index 27748fe7051..78bb382753b 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -6,7 +6,7 @@ sidebar_label: "Что такое ClickHouse" # Что такое ClickHouse {#what-is-clickhouse} -ClickHouse - столбцовая система управления базами данных (СУБД) для онлайн обработки аналитических запросов (OLAP). +ClickHouse — столбцовая система управления базами данных (СУБД) для онлайн-обработки аналитических запросов (OLAP). В обычной, «строковой» СУБД, данные хранятся в таком порядке: @@ -19,10 +19,10 @@ ClickHouse - столбцовая система управления базам То есть, значения, относящиеся к одной строке, физически хранятся рядом. -Примеры строковых СУБД: MySQL, Postgres, MS SQL Server. +Примеры строковых СУБД: MySQL, PostgreSQL, MS SQL Server. {: .grey } -В столбцовых СУБД, данные хранятся в таком порядке: +В столбцовых СУБД данные хранятся в таком порядке: | Строка: | #0 | #1 | #2 | #N | |-------------|---------------------|---------------------|---------------------|-----| @@ -33,37 +33,37 @@ ClickHouse - столбцовая система управления базам | EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | В примерах изображён только порядок расположения данных. -То есть, значения из разных столбцов хранятся отдельно, а данные одного столбца - вместе. +То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе. Примеры столбцовых СУБД: Vertica, Paraccel (Actian Matrix, Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise, Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, kdb+. {: .grey } Разный порядок хранения данных лучше подходит для разных сценариев работы. -Сценарий работы с данными - это то, какие производятся запросы, как часто и в каком соотношении; сколько читается данных на запросы каждого вида - строк, столбцов, байт; как соотносятся чтения и обновления данных; какой рабочий размер данных и насколько локально он используется; используются ли транзакции и с какой изолированностью; какие требования к дублированию данных и логической целостности; требования к задержкам на выполнение и пропускной способности запросов каждого вида и т. п. +Сценарий работы с данными — это то, какие производятся запросы, как часто и в каком соотношении; сколько читается данных на запросы каждого вида — строк, столбцов, байтов; как соотносятся чтения и обновления данных; какой рабочий размер данных и насколько локально он используется; используются ли транзакции и с какой изолированностью; какие требования к дублированию данных и логической целостности; требования к задержкам на выполнение и пропускной способности запросов каждого вида и т. п. Чем больше нагрузка на систему, тем более важной становится специализация под сценарий работы, и тем более конкретной становится эта специализация. Не существует системы, одинаково хорошо подходящей под существенно различные сценарии работы. Если система подходит под широкое множество сценариев работы, то при достаточно большой нагрузке, система будет справляться со всеми сценариями работы плохо, или справляться хорошо только с одним из сценариев работы. -## Ключевые особенности OLAP сценария работы {#kliuchevye-osobennosti-olap-stsenariia-raboty} +## Ключевые особенности OLAP-сценария работы {#kliuchevye-osobennosti-olap-stsenariia-raboty} -- подавляющее большинство запросов - на чтение; +- подавляющее большинство запросов — на чтение; - данные обновляются достаточно большими пачками (\> 1000 строк), а не по одной строке, или не обновляются вообще; - данные добавляются в БД, но не изменяются; -- при чтении, вынимается достаточно большое количество строк из БД, но только небольшое подмножество столбцов; -- таблицы являются «широкими», то есть, содержат большое количество столбцов; +- при чтении «вынимается» достаточно большое количество строк из БД, но только небольшое подмножество столбцов; +- таблицы являются «широкими», то есть содержат большое количество столбцов; - запросы идут сравнительно редко (обычно не более сотни в секунду на сервер); - при выполнении простых запросов, допустимы задержки в районе 50 мс; -- значения в столбцах достаточно мелкие - числа и небольшие строки (пример - 60 байт на URL); -- требуется высокая пропускная способность при обработке одного запроса (до миллиардов строк в секунду на один сервер); +- значения в столбцах достаточно мелкие — числа и небольшие строки (например, 60 байт на URL); +- требуется высокая пропускная способность при обработке одного запроса (до миллиардов строк в секунду на один узел); - транзакции отсутствуют; - низкие требования к консистентности данных; - в запросе одна большая таблица, все таблицы кроме одной маленькие; -- результат выполнения запроса существенно меньше исходных данных - то есть, данные фильтруются или агрегируются; результат выполнения помещается в оперативку на одном сервере. +- результат выполнения запроса существенно меньше исходных данных — то есть данные фильтруются или агрегируются; результат выполнения помещается в оперативную память одного узла. -Легко видеть, что OLAP сценарий работы существенно отличается от других распространённых сценариев работы (например, OLTP или Key-Value сценариев работы). Таким образом, не имеет никакого смысла пытаться использовать OLTP или Key-Value БД для обработки аналитических запросов, если вы хотите получить приличную производительность («выше плинтуса»). Например, если вы попытаетесь использовать для аналитики MongoDB или Redis - вы получите анекдотически низкую производительность по сравнению с OLAP-СУБД. +Легко видеть, что OLAP-сценарий работы существенно отличается от других распространённых сценариев работы (например, OLTP или Key-Value сценариев работы). Таким образом, не имеет никакого смысла пытаться использовать OLTP-системы или системы класса «ключ — значение» для обработки аналитических запросов, если вы хотите получить приличную производительность («выше плинтуса»). Например, если вы попытаетесь использовать для аналитики MongoDB или Redis — вы получите анекдотически низкую производительность по сравнению с OLAP-СУБД. -## Причины, по которым столбцовые СУБД лучше подходят для OLAP сценария {#prichiny-po-kotorym-stolbtsovye-subd-luchshe-podkhodiat-dlia-olap-stsenariia} +## Причины, по которым столбцовые СУБД лучше подходят для OLAP-сценария {#prichiny-po-kotorym-stolbtsovye-subd-luchshe-podkhodiat-dlia-olap-stsenariia} -Столбцовые СУБД лучше (от 100 раз по скорости обработки большинства запросов) подходят для OLAP сценария работы. Причины в деталях будут разъяснены ниже, а сам факт проще продемонстрировать визуально: +Столбцовые СУБД лучше (от 100 раз по скорости обработки большинства запросов) подходят для OLAP-сценария работы. Причины в деталях будут разъяснены ниже, а сам факт проще продемонстрировать визуально: **Строковые СУБД** @@ -94,6 +94,6 @@ ClickHouse - столбцовая система управления базам 2. Кодогенерация. Для запроса генерируется код, в котором подставлены все косвенные вызовы. -В «обычных» БД этого не делается, так как не имеет смысла при выполнении простых запросов. Хотя есть исключения. Например, в MemSQL кодогенерация используется для уменьшения latency при выполнении SQL запросов. Для сравнения, в аналитических СУБД требуется оптимизация throughput, а не latency. +В «обычных» СУБД этого не делается, так как не имеет смысла при выполнении простых запросов. Хотя есть исключения. Например, в MemSQL кодогенерация используется для уменьшения времени отклика при выполнении SQL-запросов. Для сравнения: в аналитических СУБД требуется оптимизация по пропускной способности (throughput, ГБ/с), а не времени отклика (latency, с). -Стоит заметить, что для эффективности по CPU требуется, чтобы язык запросов был декларативным (SQL, MDX) или хотя бы векторным (J, K). То есть, чтобы запрос содержал циклы только в неявном виде, открывая возможности для оптимизации. +Стоит заметить, что для эффективности по CPU требуется, чтобы язык запросов был декларативным (SQL, MDX) или хотя бы векторным (J, K). То есть необходимо, чтобы запрос содержал циклы только в неявном виде, открывая возможности для оптимизации. diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md index dc44ca8e03f..dafaf055980 100644 --- a/docs/ru/introduction/distinctive-features.md +++ b/docs/ru/introduction/distinctive-features.md @@ -8,11 +8,11 @@ sidebar_label: "Отличительные возможности ClickHouse" ## По-настоящему столбцовая СУБД {#po-nastoiashchemu-stolbtsovaia-subd} -В по-настоящему столбцовой СУБД рядом со значениями не хранится никаких лишних данных. Например, должны поддерживаться значения постоянной длины, чтобы не хранить рядом со значениями типа «число» их длины. Для примера, миллиард значений типа UInt8 должен действительно занимать в несжатом виде около 1GB, иначе это сильно ударит по эффективности использования CPU. Очень важно хранить данные компактно (без «мусора») в том числе в несжатом виде, так как скорость разжатия (использование CPU) зависит, в основном, от объёма несжатых данных. +В по-настоящему столбцовой СУБД рядом со значениями не хранится никаких лишних данных. Например, должны поддерживаться значения постоянной длины, чтобы не хранить рядом со значениями типа «число» их длины. Для примера, миллиард значений типа UInt8 должен действительно занимать в несжатом виде около 1 ГБ, иначе это сильно ударит по эффективности использования CPU. Очень важно хранить данные компактно (без «мусора») в том числе в несжатом виде, так как скорость разжатия (использование CPU) зависит, в основном, от объёма несжатых данных. Этот пункт пришлось выделить, так как существуют системы, которые могут хранить значения отдельных столбцов по отдельности, но не могут эффективно выполнять аналитические запросы в силу оптимизации под другой сценарий работы. Примеры: HBase, BigTable, Cassandra, HyperTable. В этих системах вы получите пропускную способность в районе сотен тысяч строк в секунду, но не сотен миллионов строк в секунду. -Также стоит заметить, что ClickHouse является системой управления базами данных, а не одной базой данных. То есть, ClickHouse позволяет создавать таблицы и базы данных в runtime, загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. +Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базой данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. ## Сжатие данных {#szhatie-dannykh} @@ -20,7 +20,7 @@ sidebar_label: "Отличительные возможности ClickHouse" ## Хранение данных на диске {#khranenie-dannykh-na-diske} -Многие столбцовые СУБД (SAP HANA, Google PowerDrill) могут работать только в оперативной памяти. Такой подход стимулирует выделять больший бюджет на оборудование, чем фактически требуется для анализа в реальном времени. ClickHouse спроектирован для работы на обычных жестких дисках, что обеспечивает низкую стоимость хранения на гигабайт данных, но SSD и дополнительная оперативная память тоже полноценно используются, если доступны. +Многие столбцовые СУБД (SAP HANA, Google PowerDrill) могут работать только в оперативной памяти. Такой подход стимулирует выделять больший бюджет на оборудование, чем фактически требуется для анализа в реальном времени. ClickHouse спроектирован для работы на обычных жестких дисках, что обеспечивает низкую стоимость хранения на гигабайт данных. При этом твердотельные накопители (SSD) и дополнительная оперативная память тоже полноценно используются, если доступны. ## Параллельная обработка запроса на многих процессорных ядрах {#parallelnaia-obrabotka-zaprosa-na-mnogikh-protsessornykh-iadrakh} @@ -29,11 +29,11 @@ sidebar_label: "Отличительные возможности ClickHouse" ## Распределённая обработка запроса на многих серверах {#raspredelionnaia-obrabotka-zaprosa-na-mnogikh-serverakh} Почти все перечисленные ранее столбцовые СУБД не поддерживают распределённую обработку запроса. -В ClickHouse данные могут быть расположены на разных шардах. Каждый шард может представлять собой группу реплик, которые используются для отказоустойчивости. Запрос будет выполнен на всех шардах параллельно. Это делается прозрачно для пользователя. +В ClickHouse данные могут быть расположены на разных сегментах (shards). Каждый сегмент может представлять собой группу реплик, которые используются для отказоустойчивости. Запрос будет выполнен на всех сегментах параллельно. Это делается прозрачно для пользователя. ## Поддержка SQL {#sql-support} -ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.mdx) совпадающий с SQL стандартом. +ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.mdx) совпадающий с SQL-стандартом. Поддерживаются [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), подзапросы в секциях [FROM](../sql-reference/statements/select/from.md), [IN](../sql-reference/operators/in.md), [JOIN](../sql-reference/statements/select/join.md), [функции window](../sql-reference/window-functions/index.mdx), а также скалярные подзапросы. @@ -41,17 +41,17 @@ ClickHouse поддерживает [декларативный язык зап ## Векторный движок {#vektornyi-dvizhok} -Данные не только хранятся по столбцам, но и обрабатываются по векторам - кусочкам столбцов. За счёт этого достигается высокая эффективность по CPU. +Данные не только хранятся по столбцам, но и обрабатываются по векторам — фрагментам столбцов. За счёт этого достигается высокая эффективность по CPU. ## Обновление данных в реальном времени {#obnovlenie-dannykh-v-realnom-vremeni} -ClickHouse поддерживает таблицы с первичным ключом. Для того, чтобы можно было быстро выполнять запросы по диапазону первичного ключа, данные инкрементально сортируются с помощью merge дерева. За счёт этого, поддерживается постоянное добавление данных в таблицу. Блокировки при добавлении данных отсутствуют. +ClickHouse поддерживает таблицы с первичным ключом. Для того, чтобы можно было быстро выполнять запросы по диапазону первичного ключа, данные инкрементально сортируются с помощью дерева со слиянием (merge tree). За счёт этого поддерживается постоянное добавление данных в таблицу. Блокировки при добавлении данных отсутствуют. ## Наличие индекса {#nalichie-indeksa} -Физическая сортировка данных по первичному ключу позволяет получать данные для конкретных его значений или их диапазонов с низкими задержками - менее десятков миллисекунд. +Физическая сортировка данных по первичному ключу позволяет получать данные для конкретных его значений или их диапазонов с низкими задержками — менее десятков миллисекунд. -## Подходит для онлайн запросов {#podkhodit-dlia-onlain-zaprosov} +## Подходит для онлайн-запросов {#podkhodit-dlia-onlain-zaprosov} Низкие задержки позволяют не откладывать выполнение запроса и не подготавливать ответ заранее, а выполнять его именно в момент загрузки страницы пользовательского интерфейса. То есть, в режиме онлайн. @@ -60,12 +60,12 @@ ClickHouse поддерживает таблицы с первичным клю ClickHouse предоставляет различные способы разменять точность вычислений на производительность: 1. Система содержит агрегатные функции для приближённого вычисления количества различных значений, медианы и квантилей. -2. Поддерживается возможность выполнить запрос на основе части (выборки) данных и получить приближённый результат. При этом, с диска будет считано пропорционально меньше данных. +2. Поддерживается возможность выполнить запрос на основе части (выборки) данных и получить приближённый результат. При этом с диска будет считано пропорционально меньше данных. 3. Поддерживается возможность выполнить агрегацию не для всех ключей, а для ограниченного количества первых попавшихся ключей. При выполнении некоторых условий на распределение ключей в данных, это позволяет получить достаточно точный результат с использованием меньшего количества ресурсов. ## Репликация данных и поддержка целостности {#replikatsiia-dannykh-i-podderzhka-tselostnosti} -Используется асинхронная multimaster репликация. После записи на любую доступную реплику, данные распространяются на все остальные реплики в фоне. Система поддерживает полную идентичность данных на разных репликах. Восстановление после большинства сбоев осуществляется автоматически, а в сложных случаях — полуавтоматически. При необходимости, можно [включить кворумную запись](../operations/settings/settings.md) данных. +Используется асинхронная multimaster-репликация. После записи на любую доступную реплику, данные распространяются на все остальные реплики в фоне. Система поддерживает полную идентичность данных на разных репликах. Восстановление после большинства сбоев осуществляется автоматически, а в сложных случаях — полуавтоматически. При необходимости, можно [включить кворумную запись](../operations/settings/settings.md) данных. Подробнее смотрите раздел [Репликация данных](../engines/table-engines/mergetree-family/replication.md). diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 543ea8f4449..ab9252dd62e 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -475,6 +475,27 @@ void FourLetterWordCommand::execute(const ASTKeeperQuery * query, KeeperClient * std::cout << client->executeFourLetterCommand(query->args[0].safeGet()) << "\n"; } +bool GetDirectChildrenNumberCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + + return true; +} + +void GetDirectChildrenNumberCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto path = client->getAbsolutePath(query->args[0].safeGet()); + + Coordination::Stat stat; + client->zookeeper->get(path, &stat); + + std::cout << stat.numChildren << "\n"; +} + bool GetAllChildrenNumberCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String path; diff --git a/programs/keeper-client/Commands.h b/programs/keeper-client/Commands.h index 45a7c85266d..c6dd731fb3b 100644 --- a/programs/keeper-client/Commands.h +++ b/programs/keeper-client/Commands.h @@ -238,6 +238,20 @@ class FourLetterWordCommand : public IKeeperClientCommand String getHelpMessage() const override { return "{} -- Executes four-letter-word command"; } }; +class GetDirectChildrenNumberCommand : public IKeeperClientCommand +{ + String getName() const override { return "get_direct_children_number"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] -- Get numbers of direct children nodes under a specific path"; + } +}; + class GetAllChildrenNumberCommand : public IKeeperClientCommand { String getName() const override { return "get_all_children_number"; } diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index f96975f5ab1..7ed4499efbd 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -207,6 +207,7 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) std::make_shared(), std::make_shared(), std::make_shared(), + std::make_shared(), std::make_shared(), }); diff --git a/programs/server/config.xml b/programs/server/config.xml index 1dd527805fd..a1e2907f6b6 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -926,6 +926,15 @@ --> + + + 3600 diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 9439a17dabd..981a7aafc6f 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -272,13 +272,13 @@ void ExternalAuthenticators::resetImpl() void ExternalAuthenticators::reset() { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); resetImpl(); } void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); resetImpl(); Poco::Util::AbstractConfiguration::Keys all_keys; @@ -390,7 +390,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B UInt128 params_hash = 0; { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); // Retrieve the server parameters. const auto pit = ldap_client_params_blueprint.find(server); @@ -460,7 +460,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B // Update the cache, but only if this is the latest check and the server is still configured in a compatible way. if (result) { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); // If the server was removed from the config while we were checking the password, we discard the current result. const auto pit = ldap_client_params_blueprint.find(server); @@ -507,7 +507,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B bool ExternalAuthenticators::checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); if (!kerberos_params.has_value()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Kerberos is not enabled"); @@ -526,7 +526,7 @@ bool ExternalAuthenticators::checkKerberosCredentials(const String & realm, cons GSSAcceptorContext::Params ExternalAuthenticators::getKerberosParams() const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); if (!kerberos_params.has_value()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Kerberos is not enabled"); @@ -536,7 +536,7 @@ GSSAcceptorContext::Params ExternalAuthenticators::getKerberosParams() const HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const String& server) const { - std::scoped_lock lock{mutex}; + std::lock_guard lock{mutex}; const auto it = http_auth_servers.find(server); if (it == http_auth_servers.end()) diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp index 7170028e4e6..02946f0d74d 100644 --- a/src/Access/GSSAcceptor.cpp +++ b/src/Access/GSSAcceptor.cpp @@ -90,7 +90,7 @@ String bufferToString(const gss_buffer_desc & buf) String extractSpecificStatusMessages(OM_uint32 status_code, int status_type, const gss_OID & mech_type) { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); String messages; OM_uint32 message_context = 0; @@ -135,7 +135,7 @@ String extractSpecificStatusMessages(OM_uint32 status_code, int status_type, con String extractStatusMessages(OM_uint32 major_status_code, OM_uint32 minor_status_code, const gss_OID & mech_type) { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); const auto gss_messages = extractSpecificStatusMessages(major_status_code, GSS_C_GSS_CODE, mech_type); const auto mech_messages = extractSpecificStatusMessages(minor_status_code, GSS_C_MECH_CODE, mech_type); @@ -158,7 +158,7 @@ String extractStatusMessages(OM_uint32 major_status_code, OM_uint32 minor_status std::pair extractNameAndRealm(const gss_name_t & name) { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); gss_buffer_desc name_buf; name_buf.length = 0; @@ -186,7 +186,7 @@ std::pair extractNameAndRealm(const gss_name_t & name) bool equalMechanisms(const String & left_str, const gss_OID & right_oid) { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); gss_buffer_desc left_buf; left_buf.length = left_str.size(); @@ -232,7 +232,7 @@ void GSSAcceptorContext::reset() void GSSAcceptorContext::resetHandles() noexcept { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); if (acceptor_credentials_handle != GSS_C_NO_CREDENTIAL) { @@ -258,7 +258,7 @@ void GSSAcceptorContext::resetHandles() noexcept void GSSAcceptorContext::initHandles() { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); resetHandles(); @@ -330,7 +330,7 @@ void GSSAcceptorContext::initHandles() String GSSAcceptorContext::processToken(const String & input_token, Poco::Logger * log) { - std::scoped_lock lock(gss_global_mutex); + std::lock_guard lock(gss_global_mutex); String output_token; diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 9be28b763e0..c271cd39845 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -36,14 +36,14 @@ LDAPAccessStorage::LDAPAccessStorage(const String & storage_name_, AccessControl String LDAPAccessStorage::getLDAPServerName() const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); return ldap_server_name; } void LDAPAccessStorage::setConfiguration(const Poco::Util::AbstractConfiguration & config, const String & prefix) { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); // TODO: switch to passing config as a ConfigurationView and remove this extra prefix once a version of Poco with proper implementation is available. const String prefix_str = (prefix.empty() ? "" : prefix + "."); @@ -102,7 +102,7 @@ void LDAPAccessStorage::setConfiguration(const Poco::Util::AbstractConfiguration void LDAPAccessStorage::processRoleChange(const UUID & id, const AccessEntityPtr & entity) { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); const auto role = typeid_cast>(entity); const auto it = granted_role_names.find(id); @@ -371,7 +371,7 @@ const char * LDAPAccessStorage::getStorageType() const String LDAPAccessStorage::getStorageParamsJSON() const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); Poco::JSON::Object params_json; params_json.set("server", ldap_server_name); @@ -417,35 +417,35 @@ String LDAPAccessStorage::getStorageParamsJSON() const std::optional LDAPAccessStorage::findImpl(AccessEntityType type, const String & name) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); return memory_storage.find(type, name); } std::vector LDAPAccessStorage::findAllImpl(AccessEntityType type) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); return memory_storage.findAll(type); } bool LDAPAccessStorage::exists(const UUID & id) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); return memory_storage.exists(id); } AccessEntityPtr LDAPAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); return memory_storage.read(id, throw_if_not_exists); } std::optional> LDAPAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); return memory_storage.readNameWithType(id, throw_if_not_exists); } @@ -458,7 +458,7 @@ std::optional LDAPAccessStorage::authenticateImpl( bool /* allow_no_password */, bool /* allow_plaintext_password */) const { - std::scoped_lock lock(mutex); + std::lock_guard lock(mutex); auto id = memory_storage.find(credentials.getUserName()); UserPtr user = id ? memory_storage.read(*id) : nullptr; diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 023f305f5cd..7926298061d 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -172,7 +172,7 @@ namespace void LDAPClient::handleError(int result_code, String text) { - std::scoped_lock lock(ldap_global_mutex); + std::lock_guard lock(ldap_global_mutex); if (result_code != LDAP_SUCCESS) { @@ -212,7 +212,7 @@ void LDAPClient::handleError(int result_code, String text) bool LDAPClient::openConnection() { - std::scoped_lock lock(ldap_global_mutex); + std::lock_guard lock(ldap_global_mutex); closeConnection(); @@ -390,7 +390,7 @@ bool LDAPClient::openConnection() void LDAPClient::closeConnection() noexcept { - std::scoped_lock lock(ldap_global_mutex); + std::lock_guard lock(ldap_global_mutex); if (!handle) return; @@ -404,7 +404,7 @@ void LDAPClient::closeConnection() noexcept LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) { - std::scoped_lock lock(ldap_global_mutex); + std::lock_guard lock(ldap_global_mutex); SearchResults result; diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 8f6652223cc..f7b3524d1b9 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -8,9 +8,6 @@ #include -static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF; - - namespace DB { @@ -134,9 +131,12 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const threshold = applyVisitor(FieldVisitorConvertToNumber(), params[0]); - if (threshold > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || threshold * load_factor > TOP_K_MAX_SIZE) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE)); + if (threshold > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold * load_factor > DB::TOP_K_MAX_SIZE) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Too large parameter(s) for aggregate function '{}' (maximum is {})", + name, + toString(DB::TOP_K_MAX_SIZE)); if (threshold == 0) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name); diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 89985c0ea6b..89c49b24530 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -20,6 +20,12 @@ namespace DB { struct Settings; +static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF; + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} template struct AggregateFunctionTopKData @@ -163,11 +169,18 @@ public: { auto & set = this->data(place).value; set.clear(); - set.resize(reserved); // Specialized here because there's no deserialiser for StringRef size_t size = 0; readVarUInt(size, buf); + if (unlikely(size > TOP_K_MAX_SIZE)) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Too large size ({}) for aggregate function '{}' state (maximum is {})", + size, + getName(), + TOP_K_MAX_SIZE); + set.resize(size); for (size_t i = 0; i < size; ++i) { auto ref = readStringBinaryInto(*arena, buf); diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 6804e85c4cf..5e8f2ab8e61 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -573,4 +573,36 @@ void replaceColumns(QueryTreeNodePtr & node, visitor.visit(node); } +namespace +{ + +class CollectIdentifiersFullNamesVisitor : public ConstInDepthQueryTreeVisitor +{ +public: + explicit CollectIdentifiersFullNamesVisitor(NameSet & used_identifiers_) + : used_identifiers(used_identifiers_) { } + + static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr &) { return true; } + + void visitImpl(const QueryTreeNodePtr & node) + { + auto * column_node = node->as(); + if (!column_node) + return; + + used_identifiers.insert(column_node->getIdentifier().getFullName()); + } + + NameSet & used_identifiers; +}; + +} + +NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node) +{ + NameSet out; + CollectIdentifiersFullNamesVisitor visitor(out); + visitor.visit(node); + return out; +} } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index ea36e17bf11..6756c4d313c 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -83,4 +83,8 @@ void replaceColumns(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression_node, const std::unordered_map & column_name_to_node); + +/// Just collect all identifiers from query tree +NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); + } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d13c9cbe9bc..d2985665db3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -104,6 +104,7 @@ if (TARGET ch_contrib::nats_io) endif() add_headers_and_sources(dbms Storages/DataLakes) +add_headers_and_sources(dbms Storages/DataLakes/Iceberg) add_headers_and_sources(dbms Common/NamedCollections) if (TARGET ch_contrib::amqp_cpp) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index a350654cdda..9c7bfe5974f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -449,20 +449,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (!block) return; - if (block.rows() == 0 && partial_result_mode == PartialResultMode::Active) - { - partial_result_mode = PartialResultMode::Inactive; - if (is_interactive) - { - progress_indication.clearProgressOutput(*tty_buf); - std::cout << "Full result:" << std::endl; - progress_indication.writeProgress(*tty_buf); - } - } - - if (partial_result_mode == PartialResultMode::Inactive) - processed_rows += block.rows(); - + processed_rows += block.rows(); /// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset. initOutputFormat(block, parsed_query); @@ -472,20 +459,13 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query) if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) return; - if (!is_interactive && partial_result_mode == PartialResultMode::Active) - return; - /// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker. if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout)) progress_indication.clearProgressOutput(*tty_buf); try { - if (partial_result_mode == PartialResultMode::Active) - output_format->writePartialResult(materializeBlock(block)); - else - output_format->write(materializeBlock(block)); - + output_format->write(materializeBlock(block)); written_first_block = true; } catch (const Exception &) @@ -549,9 +529,6 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info) void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query) try { - if (partial_result_mode == PartialResultMode::NotInit) - partial_result_mode = PartialResultMode::Active; - if (!output_format) { /// Ignore all results when fuzzing as they can be huge. @@ -994,14 +971,6 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa const auto & settings = global_context->getSettingsRef(); const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; - bool has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0; - - if (has_partial_result_setting) - { - partial_result_mode = PartialResultMode::NotInit; - if (is_interactive) - std::cout << "Partial result:" << std::endl; - } int retries_left = 10; while (retries_left) @@ -1828,7 +1797,6 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } processed_rows = 0; - partial_result_mode = PartialResultMode::Inactive; written_first_block = false; progress_indication.resetProgress(); profile_events.watch.restart(); @@ -1950,9 +1918,10 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - std::cout << std::endl - << processed_rows << " row" << (processed_rows == 1 ? "" : "s") - << " in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; + std::cout << std::endl; + if (!server_exception || processed_rows != 0) + std::cout << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. "; + std::cout << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; progress_indication.writeFinalProgress(); std::cout << std::endl << std::endl; } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 604c8cf4d5c..2156aae7181 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -275,21 +275,6 @@ protected: size_t processed_rows = 0; /// How many rows have been read or written. bool print_num_processed_rows = false; /// Whether to print the number of processed rows at - enum class PartialResultMode: UInt8 - { - /// Query doesn't show partial result before the first block with 0 rows. - /// The first block with 0 rows initializes the output table format using its header. - NotInit, - - /// Query shows partial result after the first and before the second block with 0 rows. - /// The second block with 0 rows indicates that that receiving blocks with partial result has been completed and next blocks will be with the full result. - Active, - - /// Query doesn't show partial result at all. - Inactive, - }; - PartialResultMode partial_result_mode = PartialResultMode::Inactive; - bool print_stack_trace = false; /// The last exception that was received from the server. Is used for the /// return code in batch mode. diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 0d82818a431..012ae03bab2 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include @@ -159,6 +161,59 @@ void ColumnDecimal::getPermutation(IColumn::PermutationSortDirection directio return data[lhs] > data[rhs]; }; + size_t data_size = data.size(); + res.resize(data_size); + + if (limit >= data_size) + limit = 0; + + for (size_t i = 0; i < data_size; ++i) + res[i] = i; + + if constexpr (is_arithmetic_v && !is_big_int_v) + { + if (!limit) + { + /// A case for radix sort + /// LSD RadixSort is stable + + bool reverse = direction == IColumn::PermutationSortDirection::Descending; + bool ascending = direction == IColumn::PermutationSortDirection::Ascending; + bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable; + + /// TODO: LSD RadixSort is currently not stable if direction is descending + bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable; + + /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters. + if (data_size >= 256 && data_size <= std::numeric_limits::max() && use_radix_sort) + { + for (size_t i = 0; i < data_size; ++i) + res[i] = i; + + bool try_sort = false; + + if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) + try_sort = trySort(res.begin(), res.end(), comparator_ascending); + else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) + try_sort = trySort(res.begin(), res.end(), comparator_ascending_stable); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) + try_sort = trySort(res.begin(), res.end(), comparator_descending); + else + try_sort = trySort(res.begin(), res.end(), comparator_descending_stable); + + if (try_sort) + return; + + PaddedPODArray> pairs(data_size); + for (UInt32 i = 0; i < static_cast(data_size); ++i) + pairs[i] = {data[i].value, i}; + + RadixSort>::executeLSD(pairs.data(), data_size, reverse, res.data()); + return; + } + } + } + if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) this->getPermutationImpl(limit, res, comparator_ascending, DefaultSort(), DefaultPartialSort()); else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) @@ -191,7 +246,37 @@ void ColumnDecimal::updatePermutation(IColumn::PermutationSortDirection direc return data[lhs] < data[rhs]; }; auto equals_comparator = [this](size_t lhs, size_t rhs) { return data[lhs] == data[rhs]; }; - auto sort = [](auto begin, auto end, auto pred) { ::sort(begin, end, pred); }; + auto sort = [&](auto begin, auto end, auto pred) + { + bool reverse = direction == IColumn::PermutationSortDirection::Descending; + bool ascending = direction == IColumn::PermutationSortDirection::Ascending; + bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable; + + /// TODO: LSD RadixSort is currently not stable if direction is descending + bool use_radix_sort = (sort_is_stable && ascending) || !sort_is_stable; + size_t size = end - begin; + + if (size >= 256 && size <= std::numeric_limits::max() && use_radix_sort) + { + bool try_sort = trySort(begin, end, pred); + if (try_sort) + return; + + PaddedPODArray> pairs(size); + size_t index = 0; + + for (auto * it = begin; it != end; ++it) + { + pairs[index] = {data[*it].value, static_cast(*it)}; + ++index; + } + + RadixSort>::executeLSD(pairs.data(), size, reverse, begin); + return; + } + + ::sort(begin, end, pred); + }; auto partial_sort = [](auto begin, auto mid, auto end, auto pred) { ::partial_sort(begin, mid, end, pred); }; if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index e46384e4d03..37e62c76596 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -192,26 +193,6 @@ struct ColumnVector::equals bool operator()(size_t lhs, size_t rhs) const { return CompareHelper::equals(parent.data[lhs], parent.data[rhs], nan_direction_hint); } }; -namespace -{ - template - struct ValueWithIndex - { - T value; - UInt32 index; - }; - - template - struct RadixSortTraits : RadixSortNumTraits - { - using Element = ValueWithIndex; - using Result = size_t; - - static T & extractKey(Element & elem) { return elem.value; } - static size_t extractResult(Element & elem) { return elem.index; } - }; -} - #if USE_EMBEDDED_COMPILER template @@ -254,35 +235,25 @@ template void ColumnVector::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const { - size_t s = data.size(); - res.resize(s); + size_t data_size = data.size(); + res.resize(data_size); - if (s == 0) + if (data_size == 0) return; - if (limit >= s) + if (limit >= data_size) limit = 0; - if (limit) - { - for (size_t i = 0; i < s; ++i) - res[i] = i; + for (size_t i = 0; i < data_size; ++i) + res[i] = i; - if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) - ::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this, nan_direction_hint)); - else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) - ::partial_sort(res.begin(), res.begin() + limit, res.end(), less_stable(*this, nan_direction_hint)); - else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) - ::partial_sort(res.begin(), res.begin() + limit, res.end(), greater(*this, nan_direction_hint)); - else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable) - ::partial_sort(res.begin(), res.begin() + limit, res.end(), greater_stable(*this, nan_direction_hint)); - } - else + if constexpr (is_arithmetic_v && !is_big_int_v) { - /// A case for radix sort - /// LSD RadixSort is stable - if constexpr (is_arithmetic_v && !is_big_int_v) + if (!limit) { + /// A case for radix sort + /// LSD RadixSort is stable + bool reverse = direction == IColumn::PermutationSortDirection::Descending; bool ascending = direction == IColumn::PermutationSortDirection::Ascending; bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable; @@ -291,13 +262,27 @@ void ColumnVector::getPermutation(IColumn::PermutationSortDirection direction bool use_radix_sort = (sort_is_stable && ascending && !std::is_floating_point_v) || !sort_is_stable; /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters. - if (s >= 256 && s <= std::numeric_limits::max() && use_radix_sort) + if (data_size >= 256 && data_size <= std::numeric_limits::max() && use_radix_sort) { - PaddedPODArray> pairs(s); - for (UInt32 i = 0; i < static_cast(s); ++i) + bool try_sort = false; + + if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) + try_sort = trySort(res.begin(), res.end(), less(*this, nan_direction_hint)); + else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) + try_sort = trySort(res.begin(), res.end(), less_stable(*this, nan_direction_hint)); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) + try_sort = trySort(res.begin(), res.end(), greater(*this, nan_direction_hint)); + else + try_sort = trySort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint)); + + if (try_sort) + return; + + PaddedPODArray> pairs(data_size); + for (UInt32 i = 0; i < static_cast(data_size); ++i) pairs[i] = {data[i], i}; - RadixSort>::executeLSD(pairs.data(), s, reverse, res.data()); + RadixSort>::executeLSD(pairs.data(), data_size, reverse, res.data()); /// Radix sort treats all NaNs to be greater than all numbers. /// If the user needs the opposite, we must move them accordingly. @@ -305,9 +290,9 @@ void ColumnVector::getPermutation(IColumn::PermutationSortDirection direction { size_t nans_to_move = 0; - for (size_t i = 0; i < s; ++i) + for (size_t i = 0; i < data_size; ++i) { - if (isNaN(data[res[reverse ? i : s - 1 - i]])) + if (isNaN(data[res[reverse ? i : data_size - 1 - i]])) ++nans_to_move; else break; @@ -315,38 +300,35 @@ void ColumnVector::getPermutation(IColumn::PermutationSortDirection direction if (nans_to_move) { - std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res)); + std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : data_size - nans_to_move), std::end(res)); } } + return; } } - - /// Default sorting algorithm. - for (size_t i = 0; i < s; ++i) - res[i] = i; - - if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) - ::sort(res.begin(), res.end(), less(*this, nan_direction_hint)); - else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) - ::sort(res.begin(), res.end(), less_stable(*this, nan_direction_hint)); - else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) - ::sort(res.begin(), res.end(), greater(*this, nan_direction_hint)); - else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable) - ::sort(res.begin(), res.end(), greater_stable(*this, nan_direction_hint)); } + + if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable) + this->getPermutationImpl(limit, res, less(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable) + this->getPermutationImpl(limit, res, less_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); + else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable) + this->getPermutationImpl(limit, res, greater(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); + else + this->getPermutationImpl(limit, res, greater_stable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort()); } template void ColumnVector::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const { - bool reverse = direction == IColumn::PermutationSortDirection::Descending; - bool ascending = direction == IColumn::PermutationSortDirection::Ascending; - bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable; - auto sort = [&](auto begin, auto end, auto pred) { + bool reverse = direction == IColumn::PermutationSortDirection::Descending; + bool ascending = direction == IColumn::PermutationSortDirection::Ascending; + bool sort_is_stable = stability == IColumn::PermutationSortStability::Stable; + /// A case for radix sort if constexpr (is_arithmetic_v && !is_big_int_v) { @@ -357,6 +339,10 @@ void ColumnVector::updatePermutation(IColumn::PermutationSortDirection direct /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters. if (size >= 256 && size <= std::numeric_limits::max() && use_radix_sort) { + bool try_sort = trySort(begin, end, pred); + if (try_sort) + return; + PaddedPODArray> pairs(size); size_t index = 0; diff --git a/src/Columns/RadixSortHelper.h b/src/Columns/RadixSortHelper.h new file mode 100644 index 00000000000..e7d8ea6e535 --- /dev/null +++ b/src/Columns/RadixSortHelper.h @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace DB +{ + +template +struct ValueWithIndex +{ + T value; + UInt32 index; +}; + +template +struct RadixSortTraits : RadixSortNumTraits +{ + using Element = ValueWithIndex; + using Result = size_t; + + static T & extractKey(Element & elem) { return elem.value; } + static size_t extractResult(Element & elem) { return elem.index; } +}; + +} diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp index 876bc4e641c..049dee459a7 100644 --- a/src/Common/FileChecker.cpp +++ b/src/Common/FileChecker.cpp @@ -82,33 +82,32 @@ size_t FileChecker::getTotalSize() const } -CheckResults FileChecker::check() const +FileChecker::DataValidationTasksPtr FileChecker::getDataValidationTasks() { - if (map.empty()) + return std::make_unique(map); +} + +std::optional FileChecker::checkNextEntry(DataValidationTasksPtr & check_data_tasks) const +{ + String name; + size_t expected_size; + bool is_finished = check_data_tasks->next(name, expected_size); + if (is_finished) return {}; - CheckResults results; + String path = parentPath(files_info_path) + name; + bool exists = fileReallyExists(path); + auto real_size = exists ? getRealFileSize(path) : 0; /// No race condition assuming no one else is working with these files. - for (const auto & name_size : map) + if (real_size != expected_size) { - const String & name = name_size.first; - String path = parentPath(files_info_path) + name; - bool exists = fileReallyExists(path); - auto real_size = exists ? getRealFileSize(path) : 0; /// No race condition assuming no one else is working with these files. - - if (real_size != name_size.second) - { - String failure_message = exists - ? ("Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(name_size.second)) - : ("File " + path + " doesn't exist"); - results.emplace_back(name, false, failure_message); - break; - } - - results.emplace_back(name, true, ""); + String failure_message = exists + ? ("Size of " + path + " is wrong. Size is " + toString(real_size) + " but should be " + toString(expected_size)) + : ("File " + path + " doesn't exist"); + return CheckResult(name, false, failure_message); } - return results; + return CheckResult(name, true, ""); } void FileChecker::repair() diff --git a/src/Common/FileChecker.h b/src/Common/FileChecker.h index bb0383e4b56..41b151e51b0 100644 --- a/src/Common/FileChecker.h +++ b/src/Common/FileChecker.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -28,7 +29,11 @@ public: bool empty() const { return map.empty(); } /// Check the files whose parameters are specified in sizes.json - CheckResults check() const; + /// See comment in IStorage::checkDataNext + struct DataValidationTasks; + using DataValidationTasksPtr = std::unique_ptr; + DataValidationTasksPtr getDataValidationTasks(); + std::optional checkNextEntry(DataValidationTasksPtr & check_data_tasks) const; /// Truncate files that have excessive size to the expected size. /// Throw exception if the file size is less than expected. @@ -41,6 +46,36 @@ public: /// Returns total size of all files. size_t getTotalSize() const; + struct DataValidationTasks + { + DataValidationTasks(const std::map & map_) + : map(map_), it(map.begin()) + {} + + bool next(String & out_name, size_t & out_size) + { + std::lock_guard lock(mutex); + if (it == map.end()) + return true; + out_name = it->first; + out_size = it->second; + ++it; + return false; + } + + size_t size() const + { + std::lock_guard lock(mutex); + return std::distance(it, map.end()); + } + + const std::map & map; + + mutable std::mutex mutex; + using Iterator = std::map::const_iterator; + Iterator it; + }; + private: void load(); diff --git a/src/Common/SharedLockGuard.h b/src/Common/SharedLockGuard.h new file mode 100644 index 00000000000..93d2f42e907 --- /dev/null +++ b/src/Common/SharedLockGuard.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +/** SharedLockGuard provide RAII-style locking mechanism for acquiring shared ownership of the implementation + * of the SharedLockable concept (for example std::shared_mutex or ContextSharedMutex) supplied as the + * constructor argument. Think of it as std::lock_guard which locks shared. + * + * On construction it acquires shared ownership using `lock_shared` method. + * On destruction shared ownership is released using `unlock_shared` method. + */ +template +class TSA_SCOPED_LOCKABLE SharedLockGuard +{ +public: + explicit SharedLockGuard(Mutex & mutex_) TSA_ACQUIRE_SHARED(mutex_) : mutex(mutex_) { mutex_.lock_shared(); } + + ~SharedLockGuard() TSA_RELEASE() { mutex.unlock_shared(); } + +private: + Mutex & mutex; +}; + +} diff --git a/src/Common/SharedMutexHelper.h b/src/Common/SharedMutexHelper.h new file mode 100644 index 00000000000..8dddaab6c78 --- /dev/null +++ b/src/Common/SharedMutexHelper.h @@ -0,0 +1,112 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/** SharedMutexHelper class allows to inject specific logic when underlying shared mutex is acquired + * and released. + * + * Example: + * + * class ProfileSharedMutex : public SharedMutexHelper + * { + * public: + * size_t getLockCount() const { return lock_count; } + * + * size_t getSharedLockCount() const { return shared_lock_count; } + * + * private: + * using Base = SharedMutexHelper; + * friend class SharedMutexHelper; + * + * void lockImpl() + * { + * ++lock_count; + * Base::lockImpl(); + * } + * + * void lockSharedImpl() + * { + * ++shared_lock_count; + * Base::lockSharedImpl(); + * } + * + * std::atomic lock_count = 0; + * std::atomic shared_lock_count = 0; + * }; + */ +template +class TSA_CAPABILITY("SharedMutexHelper") SharedMutexHelper +{ +public: + // Exclusive ownership + void lock() TSA_ACQUIRE() /// NOLINT + { + static_cast(this)->lockImpl(); + } + + bool try_lock() TSA_TRY_ACQUIRE(true) /// NOLINT + { + static_cast(this)->tryLockImpl(); + } + + void unlock() TSA_RELEASE() /// NOLINT + { + static_cast(this)->unlockImpl(); + } + + // Shared ownership + void lock_shared() TSA_ACQUIRE_SHARED() /// NOLINT + { + static_cast(this)->lockSharedImpl(); + } + + bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true) /// NOLINT + { + static_cast(this)->tryLockSharedImpl(); + } + + void unlock_shared() TSA_RELEASE_SHARED() /// NOLINT + { + static_cast(this)->unlockSharedImpl(); + } + +protected: + void lockImpl() TSA_NO_THREAD_SAFETY_ANALYSIS + { + mutex.lock(); + } + + void tryLockImpl() TSA_NO_THREAD_SAFETY_ANALYSIS + { + mutex.try_lock(); + } + + void unlockImpl() TSA_NO_THREAD_SAFETY_ANALYSIS + { + mutex.unlock(); + } + + void lockSharedImpl() TSA_NO_THREAD_SAFETY_ANALYSIS + { + mutex.lock_shared(); + } + + void tryLockSharedImpl() TSA_NO_THREAD_SAFETY_ANALYSIS + { + mutex.try_lock_shared(); + } + + void unlockSharedImpl() TSA_NO_THREAD_SAFETY_ANALYSIS + { + mutex.unlock_shared(); + } + + MutexType mutex; +}; + +} diff --git a/src/Common/escapeString.cpp b/src/Common/escapeString.cpp new file mode 100644 index 00000000000..621726d38ac --- /dev/null +++ b/src/Common/escapeString.cpp @@ -0,0 +1,16 @@ +#include + +#include +#include + +namespace DB +{ + +String escapeString(std::string_view value) +{ + WriteBufferFromOwnString buf; + writeEscapedString(value, buf); + return buf.str(); +} + +} diff --git a/src/Common/escapeString.h b/src/Common/escapeString.h new file mode 100644 index 00000000000..0018296889c --- /dev/null +++ b/src/Common/escapeString.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +String escapeString(std::string_view value); + +} diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index eed66cca0af..dfcbf27b9fc 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -74,7 +74,7 @@ struct AsyncLoaderTest T randomInt(T from, T to) { std::uniform_int_distribution distribution(from, to); - std::scoped_lock lock(rng_mutex); + std::lock_guard lock(rng_mutex); return distribution(rng); } diff --git a/src/Core/Range.cpp b/src/Core/Range.cpp index 293c80e70ab..de88313b9f3 100644 --- a/src/Core/Range.cpp +++ b/src/Core/Range.cpp @@ -89,17 +89,14 @@ void Range::shrinkToIncludedIfPossible() } } -namespace +bool Range::equals(const Field & lhs, const Field & rhs) { - inline bool equals(const Field & lhs, const Field & rhs) - { - return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); - } + return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); +} - inline bool less(const Field & lhs, const Field & rhs) - { - return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); - } +bool Range::less(const Field & lhs, const Field & rhs) +{ + return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); } bool Range::empty() const diff --git a/src/Core/Range.h b/src/Core/Range.h index 89113e960be..9680107cd51 100644 --- a/src/Core/Range.h +++ b/src/Core/Range.h @@ -59,6 +59,9 @@ public: static Range createRightBounded(const FieldRef & right_point, bool right_included, bool with_null = false); static Range createLeftBounded(const FieldRef & left_point, bool left_included, bool with_null = false); + static ALWAYS_INLINE bool equals(const Field & lhs, const Field & rhs); + static ALWAYS_INLINE bool less(const Field & lhs, const Field & rhs); + /** Optimize the range. If it has an open boundary and the Field type is "loose" * - then convert it to closed, narrowing by one. * That is, for example, turn (0,2) into [1]. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e4aef9b58f7..a13b01e5dfb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -314,10 +314,6 @@ class IColumn; \ M(Bool, partial_result_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \ \ - M(Bool, allow_experimental_partial_result, 0, "Enable experimental feature: partial results for running queries.", 0) \ - M(Milliseconds, partial_result_update_duration_ms, 0, "Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET.", 0) \ - M(UInt64, max_rows_in_partial_result, 10, "Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query).", 0) \ - \ M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ /** Settings for testing hedged requests */ \ @@ -546,11 +542,13 @@ class IColumn; M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ + M(Bool, optimize_trivial_approximate_count_query, false, "Use an approximate value for trivial count optimization of storages that support such estimations.", 0) \ M(Bool, optimize_count_from_files, true, "Optimize counting rows from files in supported input formats", 0) \ M(Bool, use_cache_for_count_from_files, true, "Use cache to count the number of rows in files", 0) \ M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ + M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ @@ -812,6 +810,7 @@ class IColumn; M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ M(Bool, print_pretty_type_names, false, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ + M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -899,6 +898,7 @@ class IColumn; M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ + M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \ M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \ @@ -983,6 +983,7 @@ class IColumn; \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ M(Bool, output_format_json_named_tuples_as_objects, true, "Serialize named tuple columns as JSON objects.", 0) \ + M(Bool, output_format_json_skip_null_value_in_named_tuples, false, "Skip key value pairs with null value when serialize named tuple columns as JSON objects. It is only valid when output_format_json_named_tuples_as_objects is true.", 0) \ M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \ M(Bool, output_format_json_validate_utf8, false, "Validate UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8", 0) \ \ @@ -1050,6 +1051,7 @@ class IColumn; \ M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \ M(ORCCompression, output_format_orc_compression_method, "lz4", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \ + M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \ \ M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ \ @@ -1074,7 +1076,7 @@ class IColumn; M(Bool, regexp_dict_flag_case_insensitive, false, "Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i).", 0) \ M(Bool, regexp_dict_flag_dotall, false, "Allow '.' to match newline characters for a regexp_tree dictionary.", 0) \ \ - M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ + M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source.", 0) \ M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \ // End of FORMAT_FACTORY_SETTINGS diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 5c9487b97d4..cbbe97eb05c 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -163,16 +163,23 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu && have_explicit_names) { writeChar('{', ostr); + + bool first = true; for (size_t i = 0; i < elems.size(); ++i) { - if (i != 0) - { + const auto & element_column = extractElementColumn(column, i); + if (settings.json.skip_null_value_in_named_tuples && element_column.isNullAt(row_num)) + continue; + + if (!first) writeChar(',', ostr); - } + writeJSONString(elems[i]->getElementName(), ostr, settings); writeChar(':', ostr); - elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr, settings); + elems[i]->serializeTextJSON(element_column, row_num, ostr, settings); + first = false; } + writeChar('}', ostr); } else @@ -194,15 +201,24 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t && have_explicit_names) { writeCString("{\n", ostr); + + bool first = true; for (size_t i = 0; i < elems.size(); ++i) { - if (i != 0) + const auto & element_column = extractElementColumn(column, i); + if (settings.json.skip_null_value_in_named_tuples && element_column.isNullAt(row_num)) + continue; + + if (!first) writeCString(",\n", ostr); + writeChar(' ', (indent + 1) * 4, ostr); writeJSONString(elems[i]->getElementName(), ostr, settings); writeCString(": ", ostr); elems[i]->serializeTextJSONPretty(extractElementColumn(column, i), row_num, ostr, settings, indent + 1); + first = false; } + writeChar('\n', ostr); writeChar(' ', indent * 4, ostr); writeChar('}', ostr); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 91153f2302f..0cf1dbfd675 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -116,6 +116,8 @@ DatabaseReplicated::DatabaseReplicated( if (!db_settings.collection_name.value.empty()) fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); + + replica_group_name = context_->getConfigRef().getString("replica_group_name", ""); } String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica) @@ -175,6 +177,7 @@ void DatabaseReplicated::setCluster(ClusterPtr && new_cluster) ClusterPtr DatabaseReplicated::getClusterImpl() const { + Strings unfiltered_hosts; Strings hosts; Strings host_ids; @@ -186,11 +189,25 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const { host_ids.resize(0); Coordination::Stat stat; - hosts = zookeeper->getChildren(zookeeper_path + "/replicas", &stat); - if (hosts.empty()) + unfiltered_hosts = zookeeper->getChildren(zookeeper_path + "/replicas", &stat); + if (unfiltered_hosts.empty()) throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "No replicas of database {} found. " "It's possible if the first replica is not fully created yet " "or if the last replica was just dropped or due to logical error", zookeeper_path); + + hosts.clear(); + std::vector paths; + for (const auto & host : unfiltered_hosts) + paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group"); + + auto replica_groups = zookeeper->tryGet(paths); + + for (size_t i = 0; i < paths.size(); ++i) + { + if (replica_groups[i].data == replica_group_name) + hosts.push_back(unfiltered_hosts[i]); + } + Int32 cversion = stat.cversion; ::sort(hosts.begin(), hosts.end()); @@ -309,6 +326,7 @@ void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco: cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false); } + void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessLevel mode) { try @@ -348,6 +366,21 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL "Replica {} of shard {} of replicated database at {} already exists. Replica host ID: '{}', current host ID: '{}'", replica_name, shard_name, zookeeper_path, replica_host_id, host_id); } + + /// Check that replica_group_name in ZooKeeper matches the local one and change it if necessary. + String zk_replica_group_name; + if (!current_zookeeper->tryGet(replica_path + "/replica_group", zk_replica_group_name)) + { + /// Replica groups were introduced in 23.10, so the node might not exist + current_zookeeper->create(replica_path + "/replica_group", replica_group_name, zkutil::CreateMode::Persistent); + if (!replica_group_name.empty()) + createEmptyLogEntry(current_zookeeper); + } + else if (zk_replica_group_name != replica_group_name) + { + current_zookeeper->set(replica_path + "/replica_group", replica_group_name, -1); + createEmptyLogEntry(current_zookeeper); + } } else if (is_create_query) { @@ -466,14 +499,17 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt { Coordination::Stat stat; String max_log_ptr_str = current_zookeeper->get(zookeeper_path + "/max_log_ptr", &stat); + Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(replica_path, host_id, zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_ptr", "0", zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/digest", "0", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/replica_group", replica_group_name, zkutil::CreateMode::Persistent)); /// In addition to creating the replica nodes, we record the max_log_ptr at the instant where /// we declared ourself as an existing replica. We'll need this during recoverLostReplica to /// notify other nodes that issued new queries while this node was recovering. ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/max_log_ptr", stat.version)); + Coordination::Responses responses; const auto code = current_zookeeper->tryMulti(ops, responses); if (code == Coordination::Error::ZOK) @@ -704,7 +740,21 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context); - Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas"); + Strings hosts_to_wait; + Strings unfiltered_hosts = getZooKeeper()->getChildren(zookeeper_path + "/replicas"); + + std::vector paths; + for (const auto & host : unfiltered_hosts) + paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group"); + + auto replica_groups = getZooKeeper()->tryGet(paths); + + for (size_t i = 0; i < paths.size(); ++i) + { + if (replica_groups[i].data == replica_group_name) + hosts_to_wait.push_back(unfiltered_hosts[i]); + } + return getDistributedDDLStatus(node_path, entry, query_context, &hosts_to_wait); } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 7ba91e48085..005180624ed 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -56,6 +56,7 @@ public: String getShardName() const { return shard_name; } String getReplicaName() const { return replica_name; } + String getReplicaGroupName() const { return replica_group_name; } String getFullReplicaName() const; static String getFullReplicaName(const String & shard, const String & replica); static std::pair parseFullReplicaName(const String & name); @@ -126,6 +127,7 @@ private: String zookeeper_path; String shard_name; String replica_name; + String replica_group_name; String replica_path; DatabaseReplicatedSettings db_settings; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 0ffedeb58f1..5f103a52a61 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -128,7 +128,7 @@ void DatabaseReplicatedDDLWorker::initializeReplication() } std::lock_guard lock{database->metadata_mutex}; - if (!database->checkDigestValid(context)) + if (!database->checkDigestValid(context, false)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent database metadata after reconnection to ZooKeeper"); } diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index d492128f250..b40a60e0915 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -10,10 +10,10 @@ #include #include -#include +#include +#include #include -#include #include namespace ProfileEvents @@ -50,8 +50,7 @@ CacheDictionary::CacheDictionary( DictionarySourcePtr source_ptr_, CacheDictionaryStoragePtr cache_storage_ptr_, CacheDictionaryUpdateQueueConfiguration update_queue_configuration_, - DictionaryLifetime dict_lifetime_, - bool allow_read_expired_keys_) + CacheDictionaryConfiguration configuration_) : IDictionary(dict_id_) , dict_struct(dict_struct_) , source_ptr{std::move(source_ptr_)} @@ -63,9 +62,8 @@ CacheDictionary::CacheDictionary( { update(unit_to_update); }) - , dict_lifetime(dict_lifetime_) + , configuration(configuration_) , log(&Poco::Logger::get("ExternalDictionaries")) - , allow_read_expired_keys(allow_read_expired_keys_) , rnd_engine(randomSeed()) { if (!source_ptr->supportsSelectiveLoad()) @@ -209,7 +207,7 @@ Columns CacheDictionary::getColumns( HashMap requested_keys_to_fetched_columns_during_update_index; MutableColumns fetched_columns_during_update = request.makeAttributesResultColumns(); - if (not_found_keys_size == 0 && expired_keys_size > 0 && allow_read_expired_keys) + if (not_found_keys_size == 0 && expired_keys_size > 0 && configuration.allow_read_expired_keys) { /// Start async update only if allow read expired keys and all keys are found update_queue.tryPushToUpdateQueueOrThrow(update_unit); @@ -314,7 +312,7 @@ ColumnUInt8::Ptr CacheDictionary::hasKeys(const Columns & k allow_expired_keys_during_aggregation = true; } - else if (not_found_keys_size == 0 && expired_keys_size > 0 && allow_read_expired_keys) + else if (not_found_keys_size == 0 && expired_keys_size > 0 && configuration.allow_read_expired_keys) { /// Start async update only if allow read expired keys and all keys are found update_queue.tryPushToUpdateQueueOrThrow(update_unit); @@ -589,7 +587,7 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtrclone(), cache_storage_ptr, update_queue.getConfiguration(), - dict_lifetime, - allow_read_expired_keys); + configuration); } DictionarySourcePtr getSource() const override; - const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } const DictionaryStructure & getStructure() const override { return dict_struct; } @@ -194,12 +200,10 @@ private: CacheDictionaryStoragePtr cache_storage_ptr; mutable CacheDictionaryUpdateQueue update_queue; - const DictionaryLifetime dict_lifetime; + const CacheDictionaryConfiguration configuration; Poco::Logger * log; - const bool allow_read_expired_keys; - mutable pcg64 rnd_engine; /// This lock is used for the inner cache state update function lock it for diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index 124d4c8db3f..cfb6a0bcd37 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -59,6 +59,8 @@ public: bool hasUpdateField() const override; + bool isLocal() const { return configuration.is_local; } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index fcad8398c0b..d9a4d9ccbcf 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -9,11 +9,15 @@ #include #include +#include +#include + namespace DB { namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } @@ -130,4 +134,30 @@ String TransformWithAdditionalColumns::getName() const { return "TransformWithAdditionalColumns"; } + +DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) + : async_executor(async ? std::make_unique(pipeline_) : nullptr) + , executor(async ? nullptr : std::make_unique(pipeline_)) +{} + +bool DictionaryPipelineExecutor::pull(Block & block) +{ + if (async_executor) + { + while (true) + { + bool has_data = async_executor->pull(block); + if (has_data && !block) + continue; + return has_data; + } + } + else if (executor) + return executor->pull(block); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); +} + +DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; + } diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h index 39c6e7b3c42..a545b5cdac7 100644 --- a/src/Dictionaries/DictionarySourceHelpers.h +++ b/src/Dictionaries/DictionarySourceHelpers.h @@ -16,6 +16,10 @@ namespace DB struct DictionaryStructure; class SettingsChanges; +class PullingPipelineExecutor; +class PullingAsyncPipelineExecutor; +class QueryPipeline; + /// For simple key Block blockForIds( @@ -51,4 +55,17 @@ private: size_t current_range_index = 0; }; +/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor +class DictionaryPipelineExecutor +{ +public: + DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); + bool pull(Block & block); + + ~DictionaryPipelineExecutor(); +private: + std::unique_ptr async_executor; + std::unique_ptr executor; +}; + } diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 36a0642abce..64c7eb14024 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -366,10 +366,10 @@ Pipe DirectDictionary::read(const Names & /* column_names * template void DirectDictionary::applySettings(const Settings & settings) { - if (dynamic_cast(source_ptr.get())) + if (const auto * clickhouse_source = dynamic_cast(source_ptr.get())) { /// Only applicable for CLICKHOUSE dictionary source. - use_async_executor = settings.dictionary_use_async_executor; + use_async_executor = settings.dictionary_use_async_executor && clickhouse_source->isLocal(); } } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index b06137740da..41ff4d5399e 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -12,9 +12,9 @@ #include #include -#include #include +#include #include #include @@ -288,7 +288,7 @@ DictionaryHierarchyParentToChildIndexPtr FlatDictionary::getHierarchicalIndex() const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); - HashMap> parent_to_child; + DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child; parent_to_child.reserve(element_count); UInt64 child_keys_size = static_cast(parent_keys.size()); @@ -395,7 +395,7 @@ void FlatDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); update_field_loaded_block.reset(); Block block; @@ -436,7 +436,7 @@ void FlatDictionary::loadData() if (!source_ptr->hasUpdateField()) { QueryPipeline pipeline(source_ptr->loadAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; while (executor.pull(block)) diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index d09b2c01b8e..a54916c5cd1 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -27,6 +27,7 @@ public: size_t max_array_size; bool require_nonempty; DictionaryLifetime dict_lifetime; + bool use_async_executor = false; }; FlatDictionary( diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 45525f1468b..68c347af9df 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -7,11 +7,12 @@ #include #include +#include #include +#include #include #include - namespace DB { @@ -328,7 +329,7 @@ DictionaryHierarchicalParentToChildIndexPtr HashedArrayDictionary> parent_to_child; + DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child; parent_to_child.reserve(index_to_key.size()); size_t parent_keys_container_size = parent_keys_container.size(); @@ -409,7 +410,7 @@ void HashedArrayDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); update_field_loaded_block.reset(); Block block; @@ -533,12 +534,12 @@ void HashedArrayDictionary::blockToAttributes(const Block & } template -void HashedArrayDictionary::resize(size_t added_rows) +void HashedArrayDictionary::resize(size_t total_rows) { - if (unlikely(!added_rows)) + if (unlikely(!total_rows)) return; - key_attribute.container.reserve(added_rows); + key_attribute.container.reserve(total_rows); } template @@ -727,14 +728,37 @@ void HashedArrayDictionary::loadData() { QueryPipeline pipeline; pipeline = QueryPipeline(source_ptr->loadAll()); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); + + UInt64 pull_time_microseconds = 0; + UInt64 process_time_microseconds = 0; + + size_t total_rows = 0; + size_t total_blocks = 0; - PullingPipelineExecutor executor(pipeline); Block block; - while (executor.pull(block)) + while (true) { - resize(block.rows()); + Stopwatch watch_pull; + bool has_data = executor.pull(block); + pull_time_microseconds += watch_pull.elapsedMicroseconds(); + + if (!has_data) + break; + + ++total_blocks; + total_rows += block.rows(); + + Stopwatch watch_process; + resize(total_rows); blockToAttributes(block); + process_time_microseconds += watch_process.elapsedMicroseconds(); } + + LOG_DEBUG(&Poco::Logger::get("HashedArrayDictionary"), + "Finished {}reading {} blocks with {} rows from pipeline in {:.2f} sec and inserted into hashtable in {:.2f} sec", + configuration.use_async_executor ? "asynchronous " : "", + total_blocks, total_rows, pull_time_microseconds / 1000000.0, process_time_microseconds / 1000000.0); } else { @@ -843,6 +867,7 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ContextPtr global_context, DictionarySourcePtr source_ptr, DictionaryKeyType dictionary_key_type) -> DictionaryPtr { @@ -863,6 +888,12 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime}; + ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto & settings = context->getSettingsRef(); + + const auto * clickhouse_source = dynamic_cast(source_ptr.get()); + configuration.use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor; + if (dictionary_key_type == DictionaryKeyType::Simple) return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); else @@ -872,9 +903,15 @@ void registerDictionaryArrayHashed(DictionaryFactory & factory) using namespace std::placeholders; factory.registerLayout("hashed_array", - [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple); }, false); + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/) + { + return create_layout(a, b, c, d, global_context, std::move(e), DictionaryKeyType::Simple); + }, false); factory.registerLayout("complex_key_hashed_array", - [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex); }, true); + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/) + { + return create_layout(a, b, c, d, global_context, std::move(e), DictionaryKeyType::Complex); + }, true); } } diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index bcb3f85ef06..3b9446e4e8f 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -25,6 +25,7 @@ struct HashedArrayDictionaryStorageConfiguration { const bool require_nonempty; const DictionaryLifetime lifetime; + bool use_async_executor = false; }; template @@ -212,7 +213,7 @@ private: template void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const; - void resize(size_t added_rows); + void resize(size_t total_rows); const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index d6ee6e369c4..562857cd790 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -4,13 +4,13 @@ #include #include -#include -#include -#include #include #include #include +#include +#include #include +#include #include @@ -20,7 +20,9 @@ #include #include +#include #include +#include #include #include #include @@ -600,7 +602,7 @@ DictionaryHierarchyParentToChildIndexPtr HashedDictionary> parent_to_child; + DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child; parent_to_child.reserve(size); for (const auto & map : child_key_to_parent_key_maps) @@ -709,7 +711,7 @@ void HashedDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); update_field_loaded_block.reset(); Block block; @@ -938,7 +940,7 @@ void HashedDictionary::loadData() QueryPipeline pipeline = QueryPipeline(source_ptr->loadAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; DictionaryKeysArenaHolder arena_holder; @@ -1147,6 +1149,7 @@ void registerDictionaryHashed(DictionaryFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr, + ContextPtr global_context, DictionaryKeyType dictionary_key_type, bool sparse) -> DictionaryPtr { @@ -1189,12 +1192,19 @@ void registerDictionaryHashed(DictionaryFactory & factory) if (max_load_factor < 0.5f || max_load_factor > 0.99f) throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: max_load_factor parameter should be within [0.5, 0.99], got {}", full_name, max_load_factor); + ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto & settings = context->getSettingsRef(); + + const auto * clickhouse_source = dynamic_cast(source_ptr.get()); + bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor; + HashedDictionaryConfiguration configuration{ static_cast(shards), static_cast(shard_load_queue_backlog), max_load_factor, require_nonempty, dict_lifetime, + use_async_executor, }; if (source_ptr->hasUpdateField() && shards > 1) @@ -1239,13 +1249,13 @@ void registerDictionaryHashed(DictionaryFactory & factory) using namespace std::placeholders; factory.registerLayout("hashed", - [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple, /* sparse = */ false); }, false); + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ false); }, false); factory.registerLayout("sparse_hashed", - [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple, /* sparse = */ true); }, false); + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Simple, /* sparse = */ true); }, false); factory.registerLayout("complex_key_hashed", - [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex, /* sparse = */ false); }, true); + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Complex, /* sparse = */ false); }, true); factory.registerLayout("complex_key_sparse_hashed", - [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex, /* sparse = */ true); }, true); + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr global_context, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), global_context, DictionaryKeyType::Complex, /* sparse = */ true); }, true); } diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 30eecb9ab09..3302e667de4 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -28,6 +28,7 @@ struct HashedDictionaryConfiguration const float max_load_factor; const bool require_nonempty; const DictionaryLifetime lifetime; + bool use_async_executor = false; }; template diff --git a/src/Dictionaries/HierarchyDictionariesUtils.h b/src/Dictionaries/HierarchyDictionariesUtils.h index c7508ddd220..4a986153998 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.h +++ b/src/Dictionaries/HierarchyDictionariesUtils.h @@ -26,7 +26,12 @@ public: UInt32 end_index; }; - explicit DictionaryHierarchicalParentToChildIndex(const HashMap> & parent_to_children_map_) + /// By default we use initial_bytes=4096 in PodArray. + /// It might lead to really high memory consumption when arrays are almost empty but there are a lot of them. + using Array = PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; + using ParentToChildIndex = HashMap; + + explicit DictionaryHierarchicalParentToChildIndex(const ParentToChildIndex & parent_to_children_map_) { size_t parent_to_children_map_size = parent_to_children_map_.size(); diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 6bb06de7506..2e3c09c67c5 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include @@ -197,13 +199,11 @@ IPAddressDictionary::IPAddressDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, - bool require_nonempty_) + IPAddressDictionary::Configuration configuration_) : IDictionary(dict_id_) , dict_struct(dict_struct_) , source_ptr{std::move(source_ptr_)} - , dict_lifetime(dict_lifetime_) - , require_nonempty(require_nonempty_) + , configuration(configuration_) , access_to_key_from_attributes(dict_struct_.access_to_key_from_attributes) , logger(&Poco::Logger::get("IPAddressDictionary")) { @@ -369,7 +369,7 @@ void IPAddressDictionary::loadData() bool has_ipv6 = false; - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; while (executor.pull(block)) { @@ -525,7 +525,7 @@ void IPAddressDictionary::loadData() LOG_TRACE(logger, "{} ip records are read", ip_records.size()); - if (require_nonempty && 0 == element_count) + if (configuration.require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, "{}: dictionary source is empty and 'require_nonempty' property is set.", getFullName()); } @@ -971,7 +971,7 @@ void registerDictionaryTrie(DictionaryFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr, - ContextPtr /* global_context */, + ContextPtr global_context, bool /*created_from_ddl*/) -> DictionaryPtr { if (!dict_struct.key || dict_struct.key->size() != 1) @@ -981,8 +981,17 @@ void registerDictionaryTrie(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto * clickhouse_source = dynamic_cast(source_ptr.get()); + bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && context->getSettingsRef().dictionary_use_async_executor; + + IPAddressDictionary::Configuration configuration{ + .dict_lifetime = dict_lifetime, + .require_nonempty = require_nonempty, + .use_async_executor = use_async_executor, + }; // This is specialised dictionary for storing IPv4 and IPv6 prefixes. - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); + return std::make_unique(dict_id, dict_struct, std::move(source_ptr), configuration); }; factory.registerLayout("ip_trie", create_layout, true); } diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 40dc5dd6782..c5b9287c2b5 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -22,12 +22,18 @@ class Arena; class IPAddressDictionary final : public IDictionary { public: + struct Configuration + { + DictionaryLifetime dict_lifetime; + bool require_nonempty; + bool use_async_executor = false; + }; + IPAddressDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, /// NOLINT - bool require_nonempty_); + Configuration configuration_); std::string getKeyDescription() const { return key_description; } @@ -53,12 +59,12 @@ public: std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); + return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), configuration); } DictionarySourcePtr getSource() const override { return source_ptr; } - const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + const DictionaryLifetime & getLifetime() const override { return configuration.dict_lifetime; } const DictionaryStructure & getStructure() const override { return dict_struct; } @@ -199,8 +205,7 @@ private: DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; - const DictionaryLifetime dict_lifetime; - const bool require_nonempty; + const Configuration configuration; const bool access_to_key_from_attributes; const std::string key_description{dict_struct.getKeyDescription()}; diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index ad36608343d..df3ae439b00 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -231,7 +232,7 @@ void IPolygonDictionary::loadData() { QueryPipeline pipeline(source_ptr->loadAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; while (executor.pull(block)) blockToAttributes(block); diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h index d4864acdec4..a856d12b66c 100644 --- a/src/Dictionaries/PolygonDictionary.h +++ b/src/Dictionaries/PolygonDictionary.h @@ -56,6 +56,8 @@ public: /// Store polygon key column. That will allow to read columns from polygon dictionary. bool store_polygon_key_column = false; + + bool use_async_executor = false; }; IPolygonDictionary( diff --git a/src/Dictionaries/PolygonDictionaryImplementations.cpp b/src/Dictionaries/PolygonDictionaryImplementations.cpp index e33b3c13c19..3feca2ec410 100644 --- a/src/Dictionaries/PolygonDictionaryImplementations.cpp +++ b/src/Dictionaries/PolygonDictionaryImplementations.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include @@ -161,7 +163,7 @@ DictionaryPtr createLayout(const std::string & , const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr, - ContextPtr /* global_context */, + ContextPtr global_context, bool /*created_from_ddl*/) { const String database = config.getString(config_prefix + ".database", ""); @@ -219,11 +221,16 @@ DictionaryPtr createLayout(const std::string & , config.keys(layout_prefix, keys); const auto & dict_prefix = layout_prefix + "." + keys.front(); + ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto * clickhouse_source = dynamic_cast(source_ptr.get()); + bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && context->getSettingsRef().dictionary_use_async_executor; + IPolygonDictionary::Configuration configuration { .input_type = input_type, .point_type = point_type, - .store_polygon_key_column = config.getBool(dict_prefix + ".store_polygon_key_column", false) + .store_polygon_key_column = config.getBool(dict_prefix + ".store_polygon_key_column", false), + .use_async_executor = use_async_executor, }; if (dict_struct.range_min || dict_struct.range_max) diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 3f9bad941d5..624a57d65b5 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -29,7 +29,9 @@ #include #include +#include #include +#include namespace DB @@ -56,6 +58,7 @@ struct RangeHashedDictionaryConfiguration bool convert_null_range_bound_to_open; RangeHashedDictionaryLookupStrategy lookup_strategy; bool require_nonempty; + bool use_async_executor = false; }; template @@ -655,7 +658,7 @@ void RangeHashedDictionary::loadData() if (!source_ptr->hasUpdateField()) { QueryPipeline pipeline(source_ptr->loadAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; while (executor.pull(block)) @@ -919,7 +922,7 @@ void RangeHashedDictionary::updateData() if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) { QueryPipeline pipeline(source_ptr->loadUpdatedAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); update_field_loaded_block.reset(); Block block; diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 7d6ad61c691..bbd101d55aa 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -310,7 +310,7 @@ void RegExpTreeDictionary::loadData() if (!source_ptr->hasUpdateField()) { QueryPipeline pipeline(source_ptr->loadAll()); - PullingPipelineExecutor executor(pipeline); + DictionaryPipelineExecutor executor(pipeline, configuration.use_async_executor); Block block; while (executor.pull(block)) @@ -867,12 +867,17 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory) String dictionary_layout_prefix = config_prefix + ".layout" + ".regexp_tree"; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - RegExpTreeDictionary::Configuration configuration{ - .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false), .lifetime = dict_lifetime}; - const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto * clickhouse_source = typeid_cast(source_ptr.get()); + bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && context->getSettingsRef().dictionary_use_async_executor; + + RegExpTreeDictionary::Configuration configuration{ + .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false), + .lifetime = dict_lifetime, + .use_async_executor = use_async_executor, + }; return std::make_unique( dict_id, diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h index 62008bb5aae..6597584ed45 100644 --- a/src/Dictionaries/RegExpTreeDictionary.h +++ b/src/Dictionaries/RegExpTreeDictionary.h @@ -40,6 +40,7 @@ public: { bool require_nonempty; DictionaryLifetime lifetime; + bool use_async_executor = false; }; const std::string name = "RegExpTree"; diff --git a/src/Dictionaries/registerCacheDictionaries.cpp b/src/Dictionaries/registerCacheDictionaries.cpp index 0a68f5859d8..b79261955ff 100644 --- a/src/Dictionaries/registerCacheDictionaries.cpp +++ b/src/Dictionaries/registerCacheDictionaries.cpp @@ -2,7 +2,10 @@ #include "CacheDictionaryStorage.h" #include "SSDCacheDictionaryStorage.h" #include + +#include #include +#include #include namespace DB @@ -222,6 +225,16 @@ DictionaryPtr createCacheDictionaryLayout( storage = std::make_shared>(storage_configuration); } #endif + ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto & settings = context->getSettingsRef(); + + const auto * clickhouse_source = dynamic_cast(source_ptr.get()); + bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && settings.dictionary_use_async_executor; + CacheDictionaryConfiguration configuration{ + allow_read_expired_keys, + dict_lifetime, + use_async_executor, + }; auto dictionary = std::make_unique>( dictionary_identifier, @@ -229,8 +242,7 @@ DictionaryPtr createCacheDictionaryLayout( std::move(source_ptr), std::move(storage), update_queue_configuration, - dict_lifetime, - allow_read_expired_keys); + configuration); return dictionary; } diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp index 93784a0709e..4e20abfdb79 100644 --- a/src/Dictionaries/registerRangeHashedDictionary.cpp +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -16,6 +16,7 @@ static DictionaryPtr createRangeHashedDictionary(const std::string & full_name, const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, + ContextPtr global_context, DictionarySourcePtr source_ptr) { static constexpr auto layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "range_hashed" : "complex_key_range_hashed"; @@ -52,11 +53,16 @@ static DictionaryPtr createRangeHashedDictionary(const std::string & full_name, else if (range_lookup_strategy == "max") lookup_strategy = RangeHashedDictionaryLookupStrategy::max; + auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); + const auto * clickhouse_source = dynamic_cast(source_ptr.get()); + bool use_async_executor = clickhouse_source && clickhouse_source->isLocal() && context->getSettingsRef().dictionary_use_async_executor; + RangeHashedDictionaryConfiguration configuration { .convert_null_range_bound_to_open = convert_null_range_bound_to_open, .lookup_strategy = lookup_strategy, - .require_nonempty = require_nonempty + .require_nonempty = require_nonempty, + .use_async_executor = use_async_executor, }; DictionaryPtr result = std::make_unique>( @@ -76,10 +82,10 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr, - ContextPtr /* global_context */, + ContextPtr global_context, bool /*created_from_ddl*/) -> DictionaryPtr { - return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); + return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, global_context, std::move(source_ptr)); }; factory.registerLayout("range_hashed", create_layout_simple, false); @@ -89,10 +95,10 @@ void registerDictionaryRangeHashed(DictionaryFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, DictionarySourcePtr source_ptr, - ContextPtr /* context */, + ContextPtr global_context, bool /*created_from_ddl*/) -> DictionaryPtr { - return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, std::move(source_ptr)); + return createRangeHashedDictionary(full_name, dict_struct, config, config_prefix, global_context, std::move(source_ptr)); }; factory.registerLayout("complex_key_range_hashed", create_layout_complex, true); diff --git a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp index 10d335ebbbc..ea8b7accfa1 100644 --- a/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp +++ b/src/Dictionaries/tests/gtest_hierarchy_dictionaries_utils.cpp @@ -173,7 +173,7 @@ TEST(HierarchyDictionariesUtils, getIsInHierarchy) TEST(HierarchyDictionariesUtils, getDescendants) { { - HashMap> parent_to_child; + DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child; parent_to_child[0].emplace_back(1); parent_to_child[1].emplace_back(2); parent_to_child[1].emplace_back(3); @@ -221,7 +221,7 @@ TEST(HierarchyDictionariesUtils, getDescendants) } } { - HashMap> parent_to_child; + DictionaryHierarchicalParentToChildIndex::ParentToChildIndex parent_to_child; parent_to_child[1].emplace_back(2); parent_to_child[2].emplace_back(1); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 2713b7cb35f..920a3cb1bb5 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -100,6 +100,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.array_of_rows = settings.output_format_json_array_of_rows; format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes; format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects; + format_settings.json.skip_null_value_in_named_tuples = settings.output_format_json_skip_null_value_in_named_tuples; format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects; format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple; format_settings.json.ignore_unknown_keys_in_named_tuple = settings.input_format_json_ignore_unknown_keys_in_named_tuple; @@ -193,7 +194,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; format_settings.orc.output_compression_method = settings.output_format_orc_compression_method; + format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride; format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; + format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 655aaa81d35..77fd8e1fcbd 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -188,6 +188,7 @@ struct FormatSettings bool escape_forward_slashes = true; bool read_named_tuples_as_objects = false; bool write_named_tuples_as_objects = false; + bool skip_null_value_in_named_tuples = false; bool defaults_for_missing_elements_in_named_tuple = false; bool ignore_unknown_keys_in_named_tuple = false; bool serialize_as_strings = false; @@ -363,6 +364,8 @@ struct FormatSettings bool output_string_as_string = false; ORCCompression output_compression_method = ORCCompression::NONE; bool use_fast_decoder = true; + bool filter_push_down = true; + UInt64 output_row_index_stride = 10'000; } orc; /// For capnProto format we should determine how to diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 4444feb6129..b8c0d27c42e 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -8,7 +8,9 @@ #include #include #include +#include +#include #include #include @@ -16,7 +18,9 @@ #include #include +#include #include +#include namespace DB @@ -47,312 +51,345 @@ struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16 scale) { Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); return DateTime64(DecimalUtils::multiplyAdd(t.value, multiplier, delta)); } - - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { Int64 multiplier = DecimalUtils::scaleMultiplier(9); return DateTime64(DecimalUtils::multiplyAdd(static_cast(t), multiplier, delta)); } - - static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, const DateLUTImpl &, UInt16) { throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date"); } - - static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, const DateLUTImpl &, UInt16) { throw Exception(ErrorCodes::LOGICAL_ERROR, "addNanoseconds() cannot be used with Date32"); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddMicrosecondsImpl { static constexpr auto name = "addMicroseconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16 scale) { Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); return DateTime64(scale <= 6 ? DecimalUtils::multiplyAdd(t.value, multiplier, delta) : DecimalUtils::multiplyAdd(delta, multiplier, t.value)); } - - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { Int64 multiplier = DecimalUtils::scaleMultiplier(6); return DateTime64(DecimalUtils::multiplyAdd(static_cast(t), multiplier, delta)); } - - static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, const DateLUTImpl &, UInt16) { throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date"); } - - static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, const DateLUTImpl &, UInt16) { throw Exception(ErrorCodes::LOGICAL_ERROR, "addMicroseconds() cannot be used with Date32"); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddMillisecondsImpl { static constexpr auto name = "addMilliseconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16 scale) { Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); return DateTime64(scale <= 3 ? DecimalUtils::multiplyAdd(t.value, multiplier, delta) : DecimalUtils::multiplyAdd(delta, multiplier, t.value)); } - - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { Int64 multiplier = DecimalUtils::scaleMultiplier(3); return DateTime64(DecimalUtils::multiplyAdd(static_cast(t), multiplier, delta)); } - - static inline NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int8 execute(UInt16, Int64, const DateLUTImpl &, const DateLUTImpl &, UInt16) { throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date"); } - - static inline NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int8 execute(Int32, Int64, const DateLUTImpl &, const DateLUTImpl &, UInt16) { throw Exception(ErrorCodes::LOGICAL_ERROR, "addMilliseconds() cannot be used with Date32"); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddSecondsImpl { static constexpr auto name = "addSeconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16 scale) { return DateTime64(DecimalUtils::multiplyAdd(delta, DecimalUtils::scaleMultiplier(scale), t.value)); } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return static_cast(t + delta); } - - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { // use default datetime64 scale - static_assert(DataTypeDateTime64::default_scale == 3, ""); + static_assert(DataTypeDateTime64::default_scale == 3); return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.fromDayNum(DayNum(d)) + delta); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddMinutesImpl { static constexpr auto name = "addMinutes"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16 scale) { return t + 60 * delta * DecimalUtils::scaleMultiplier(scale); } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return static_cast(t + delta * 60); } - - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { // use default datetime64 scale - static_assert(DataTypeDateTime64::default_scale == 3, ""); + static_assert(DataTypeDateTime64::default_scale == 3); return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.fromDayNum(DayNum(d)) + delta * 60); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddHoursImpl { static constexpr auto name = "addHours"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16 scale) { return t + 3600 * delta * DecimalUtils::scaleMultiplier(scale); } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return static_cast(t + delta * 3600); } - - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { // use default datetime64 scale - static_assert(DataTypeDateTime64::default_scale == 3, ""); + static_assert(DataTypeDateTime64::default_scale == 3); return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.fromDayNum(DayNum(d)) + delta * 3600); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddDaysImpl { static constexpr auto name = "addDays"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16 scale) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addDays(d.quot, delta) * multiplier + d.rem; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.addDays(t, delta)); } - - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return d + delta; } - - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return static_cast(d + delta); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddWeeksImpl { static constexpr auto name = "addWeeks"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16 scale) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.addWeeks(t, delta)); } - - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return static_cast(d + delta * 7); } - - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, const DateLUTImpl &, UInt16) { return static_cast(d + delta * 7); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16 scale) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addMonths(d.quot, delta) * multiplier + d.rem; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.addMonths(t, delta)); } - - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return time_zone.addMonths(DayNum(d), delta); } - - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return time_zone.addMonths(ExtendedDayNum(d), delta); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddQuartersImpl { static constexpr auto name = "addQuarters"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16 scale) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; } - - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.addQuarters(t, delta)); } - - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return time_zone.addQuarters(DayNum(d), delta); } - - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return time_zone.addQuarters(ExtendedDayNum(d), delta); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + static NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16 scale) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addYears(d.quot, delta) * multiplier + d.rem; } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return static_cast(time_zone.addYears(t, delta)); } - - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return time_zone.addYears(DayNum(d), delta); } - - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl &, UInt16) { return time_zone.addYears(ExtendedDayNum(d), delta); } + static DateTime64 execute(std::string_view s, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) + { + ReadBufferFromString buf(s); + DateTime64 t; + parseDateTime64BestEffort(t, scale, buf, time_zone, utc_time_zone); + return execute(t, delta, time_zone, utc_time_zone, scale); + } }; template @@ -361,10 +398,10 @@ struct SubtractIntervalImpl : public Transform using Transform::Transform; template - inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const + NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone, UInt16 scale) const { /// Signed integer overflow is Ok. - return Transform::execute(t, -delta, time_zone, scale); + return Transform::execute(t, -delta, time_zone, utc_time_zone, scale); } }; @@ -382,52 +419,67 @@ struct SubtractYearsImpl : SubtractIntervalImpl { static constexpr template -struct Adder +struct Processor { const Transform transform; - explicit Adder(Transform transform_) + explicit Processor(Transform transform_) : transform(std::move(transform_)) {} - template - void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const + template + void NO_INLINE vectorConstant(const FromColumnType & col_from, ToColumnType & col_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { - size_t size = vec_from.size(); - vec_to.resize(size); + static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); - for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale); + if constexpr (std::is_same_v) + { + const auto & offsets_from = col_from.getOffsets(); + auto & vec_to = col_to.getData(); + + size_t size = offsets_from.size(); + vec_to.resize(size); + + for (size_t i = 0 ; i < size; ++i) + { + std::string_view from = col_from.getDataAt(i).toView(); + vec_to[i] = transform.execute(from, checkOverflow(delta), time_zone, utc_time_zone, scale); + } + } + else + { + const auto & vec_from = col_from.getData(); + auto & vec_to = col_to.getData(); + + size_t size = vec_from.size(); + vec_to.resize(size); + + for (size_t i = 0; i < size; ++i) + vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, utc_time_zone, scale); + } } - template - void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const + template + void vectorVector(const FromColumnType & col_from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { - size_t size = vec_from.size(); - vec_to.resize(size); - castTypeToEither< ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; }); + &delta, [&](const auto & column){ vectorVector(col_from, col_to, column, time_zone, scale); return true; }); } - template - void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const + template + void constantVector(const FromType & from, ToColumnType & col_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { - size_t size = delta.size(); - vec_to.resize(size); - castTypeToEither< ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; }); + &delta, [&](const auto & column){ constantVector(from, col_to, column, time_zone, scale); return true; }); } private: - template static Int64 checkOverflow(Value val) { @@ -437,20 +489,52 @@ private: throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); } - template + template NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( - const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const + const FromColumnType & col_from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const { - for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale); + static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); + + if constexpr (std::is_same_v) + { + const auto & offsets_from = col_from.getOffsets(); + auto & vec_to = col_to.getData(); + + size_t size = offsets_from.size(); + vec_to.resize(size); + + for (size_t i = 0 ; i < size; ++i) + { + std::string_view from = col_from.getDataAt(i).toView(); + vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); + } + } + else + { + const auto & vec_from = col_from.getData(); + auto & vec_to = col_to.getData(); + + size_t size = vec_from.size(); + vec_to.resize(size); + + for (size_t i = 0; i < size; ++i) + vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); + } } - template + template NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( - const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const + const FromType & from, ToColumnType & col_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale) const { + static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); + + auto & vec_to = col_to.getData(); + + size_t size = delta.size(); + vec_to.resize(size); + for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale); + vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, utc_time_zone, scale); } }; @@ -458,34 +542,34 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; using ToColumnType = typename ToDataType::ColumnType; - auto op = Adder{std::move(transform)}; + const IColumn & source_column = *arguments[0].column; + const IColumn & delta_column = *arguments[1].column; const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - const ColumnPtr source_col = arguments[0].column; - auto result_col = result_type->createColumn(); auto col_to = assert_cast(result_col.get()); - const IColumn & delta_column = *arguments[1].column; - if (const auto * sources = checkAndGetColumn(source_col.get())) + auto processor = Processor{std::move(transform)}; + + if (const auto * sources = checkAndGetColumn(&source_column)) { if (const auto * delta_const_column = typeid_cast(&delta_column)) - op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale); + processor.vectorConstant(*sources, *col_to, delta_const_column->getInt(0), time_zone, scale); else - op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale); + processor.vectorVector(*sources, *col_to, delta_column, time_zone, scale); } - else if (const auto * sources_const = checkAndGetColumnConst(source_col.get())) + else if (const auto * sources_const = checkAndGetColumnConst(&source_column)) { - op.constantVector( + processor.constantVector( sources_const->template getValue(), - col_to->getData(), delta_column, time_zone, scale); + *col_to, delta_column, time_zone, scale); } else { @@ -506,7 +590,7 @@ template <> struct ResultDataTypeMap { using ResultDataType = DataTy template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDate32; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime64; }; template <> struct ResultDataTypeMap { using ResultDataType = DataTypeDateTime64; }; -template <> struct ResultDataTypeMap { using ResultDataType = DataTypeInt8; }; // error +template <> struct ResultDataTypeMap { using ResultDataType = DataTypeInt8; }; // error } template @@ -516,10 +600,7 @@ public: static constexpr auto name = Transform::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -532,30 +613,28 @@ public: "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", getName(), arguments.size()); - if (!isNativeNumber(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} (delta) must be a number", - getName()); - if (arguments.size() == 2) { - if (!isDate(arguments[0].type) && !isDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type) && !isString(arguments[0].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " - "Should be a date or a date with time", arguments[0].type->getName(), getName()); + "Must be a date, a date with time or a String", arguments[0].type->getName(), getName()); } else { - if (!WhichDataType(arguments[0].type).isDateTime() - || !WhichDataType(arguments[2].type).isString()) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} supports 2 or 3 arguments. " - "The 1st argument must be of type Date or DateTime. " - "The 2nd argument must be a number. " - "The 3rd argument (optional) must be a constant string with timezone name. " + if (!WhichDataType(arguments[0].type).isDateTime()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be a DateTime", arguments[0].type->getName(), getName()); + + if (!WhichDataType(arguments[2].type).isString()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}. " + "The 3rd argument must be a constant string with a timezone name. " "The timezone argument is allowed only when the 1st argument has the type DateTime", - getName()); - } + arguments[2].type->getName(), getName()); } + if (!isNativeNumber(arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be a number", getName()); + switch (arguments[0].type->getTypeId()) { case TypeIndex::Date: @@ -566,18 +645,18 @@ public: return resolveReturnType(arguments); case TypeIndex::DateTime64: return resolveReturnType(arguments); + case TypeIndex::String: + return resolveReturnType(arguments); default: - { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid type of 1st argument of function {}: " "{}, expected: Date, DateTime or DateTime64.", getName(), arguments[0].type->getName()); - } } } /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// e.g. addSeconds(Date, 1) => DateTime template - using TransformExecuteReturnType = decltype(std::declval().execute(FieldType(), 0, std::declval(), 0)); + using TransformExecuteReturnType = decltype(std::declval().execute(FieldType(), 0, std::declval(), std::declval(), 0)); // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl). // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return, @@ -591,17 +670,11 @@ public: using ResultDataType = TransformResultDataType; if constexpr (std::is_same_v) - { return std::make_shared(); - } else if constexpr (std::is_same_v) - { return std::make_shared(); - } else if constexpr (std::is_same_v) - { return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); - } else if constexpr (std::is_same_v) { static constexpr auto target_scale = std::invoke( @@ -627,9 +700,7 @@ public: return std::make_shared(target_scale.value_or(DataTypeDateTime64::default_scale), std::move(timezone)); } else if constexpr (std::is_same_v) - { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot be used with {}", getName(), arguments[0].type->getName()); - } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type in datetime add interval function"); } @@ -643,29 +714,21 @@ public: WhichDataType which(from_type); if (which.isDate()) - { - return DateTimeAddIntervalImpl, Transform>::execute( - Transform{}, arguments, result_type); - } + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); else if (which.isDate32()) - { - return DateTimeAddIntervalImpl, Transform>::execute( - Transform{}, arguments, result_type); - } + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); else if (which.isDateTime()) + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, 0); + else if (which.isDateTime64()) { - return DateTimeAddIntervalImpl, Transform>::execute( - Transform{}, arguments, result_type); - } - else if (const auto * datetime64_type = assert_cast(from_type)) - { + const auto * datetime64_type = assert_cast(from_type); auto from_scale = datetime64_type->getScale(); - return DateTimeAddIntervalImpl, Transform>::execute( - Transform{}, arguments, result_type, from_scale); + return DateTimeAddIntervalImpl, Transform>::execute(Transform{}, arguments, result_type, from_scale); } + else if (which.isString()) + return DateTimeAddIntervalImpl::execute(Transform{}, arguments, result_type, 3); else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", - arguments[0].type->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", arguments[0].type->getName(), getName()); } }; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index cf60eea547b..69c5aa48155 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -89,6 +89,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; + extern const int CANNOT_PARSE_BOOL; } @@ -1683,7 +1684,25 @@ struct ConvertImplGenericFromString const auto & val = col_from_string->getDataAt(i); ReadBufferFromMemory read_buffer(val.data, val.size); - serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); + try + { + serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); + } + catch (const Exception & e) + { + auto * nullable_column = typeid_cast(&column_to); + if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) + { + auto & col_nullmap = nullable_column->getNullMapData(); + if (col_nullmap.size() != nullable_column->size()) + col_nullmap.resize_fill(nullable_column->size()); + if (nullable_column->size() == (i + 1)) + nullable_column->popBack(1); + nullable_column->insertDefault(); + continue; + } + throw; + } if (!read_buffer.eof()) { @@ -4177,15 +4196,21 @@ private: { if constexpr (std::is_same_v) { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, + input_format_ipv4_default_on_conversion_error_value, + requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t) -> ColumnPtr { if (!WhichDataType(result_type).isIPv4()) throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value || requested_result_is_nullable) + if (requested_result_is_nullable) + return convertToIPv4(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value) return convertToIPv4(arguments[0].column, null_map); else return convertToIPv4(arguments[0].column, null_map); @@ -4196,16 +4221,22 @@ private: if constexpr (std::is_same_v) { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, + input_format_ipv6_default_on_conversion_error_value, + requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t) -> ColumnPtr { if (!WhichDataType(result_type).isIPv6()) throw Exception( ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value || requested_result_is_nullable) + if (requested_result_is_nullable) + return convertToIPv6(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value) return convertToIPv6(arguments[0].column, null_map); else return convertToIPv6(arguments[0].column, null_map); @@ -4216,7 +4247,18 @@ private: if (to_type->getCustomSerialization() && to_type->getCustomName()) { - ret = &ConvertImplGenericFromString::execute; + ret = [requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t input_rows_count) -> ColumnPtr + { + auto wrapped_result_type = result_type; + if (requested_result_is_nullable) + wrapped_result_type = makeNullable(result_type); + return ConvertImplGenericFromString::execute( + arguments, wrapped_result_type, column_nullable, input_rows_count); + }; return true; } } @@ -4231,7 +4273,9 @@ private: ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (cast_ipv4_ipv6_default_on_conversion_error_value || requested_result_is_nullable) + if (requested_result_is_nullable) + return convertIPv6ToIPv4(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value) return convertIPv6ToIPv4(arguments[0].column, null_map); else return convertIPv6ToIPv4(arguments[0].column, null_map); diff --git a/src/Functions/FunctionsOpDate.cpp b/src/Functions/FunctionsOpDate.cpp index dcbc1fc1256..997513109d6 100644 --- a/src/Functions/FunctionsOpDate.cpp +++ b/src/Functions/FunctionsOpDate.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -22,7 +23,6 @@ public: explicit FunctionOpDate(ContextPtr context_) : context(context_) {} - static FunctionPtr create(ContextPtr context) { return std::make_shared>(context); } String getName() const override { return name; } @@ -32,19 +32,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of 1st argument of function {}. Should be a date or a date with time", - arguments[0].type->getName(), - getName()); - - if (!isInterval(arguments[1].type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of 2nd argument of function {}. Should be an interval", - arguments[1].type->getName(), - getName()); + FunctionArgumentDescriptors args{ + {"date", &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date or date with time"}, + {"interval", &isInterval, nullptr, "Interval"} + }; + validateFunctionArgumentTypes(*this, arguments, args); auto op = FunctionFactory::instance().get(Op::internal_name, context); auto op_build = op->build(arguments); diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp deleted file mode 100644 index b1c47700d13..00000000000 --- a/src/Functions/SubtractSubSeconds.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - - -namespace DB -{ - -using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; -REGISTER_FUNCTION(SubtractNanoseconds) -{ - factory.registerFunction(); -} - -using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; -REGISTER_FUNCTION(SubtractMicroseconds) -{ - factory.registerFunction(); -} - -using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; -REGISTER_FUNCTION(SubtractMilliseconds) -{ - factory.registerFunction(); -} - -} - - diff --git a/src/Functions/addMicroseconds.cpp b/src/Functions/addMicroseconds.cpp new file mode 100644 index 00000000000..0dcd6b4452f --- /dev/null +++ b/src/Functions/addMicroseconds.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; +REGISTER_FUNCTION(AddMicroseconds) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/Functions/addMilliseconds.cpp b/src/Functions/addMilliseconds.cpp new file mode 100644 index 00000000000..0e2b696d367 --- /dev/null +++ b/src/Functions/addMilliseconds.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; +REGISTER_FUNCTION(AddMilliseconds) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/Functions/addNanoseconds.cpp b/src/Functions/addNanoseconds.cpp new file mode 100644 index 00000000000..93eadc814d9 --- /dev/null +++ b/src/Functions/addNanoseconds.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; +REGISTER_FUNCTION(AddNanoseconds) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp deleted file mode 100644 index fa901ad4dcd..00000000000 --- a/src/Functions/addSubSeconds.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - - -namespace DB -{ - -using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; -REGISTER_FUNCTION(AddNanoseconds) -{ - factory.registerFunction(); -} - -using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; -REGISTER_FUNCTION(AddMicroseconds) -{ - factory.registerFunction(); -} - -using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; -REGISTER_FUNCTION(AddMilliseconds) -{ - factory.registerFunction(); -} - -} - - diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp index 94ed5d59ca9..b5b650e7289 100644 --- a/src/Functions/array/arrayFold.cpp +++ b/src/Functions/array/arrayFold.cpp @@ -30,37 +30,37 @@ public: void getLambdaArgumentTypes(DataTypes & arguments) const override { if (arguments.size() < 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator argument", getName()); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator", getName()); - DataTypes nested_types(arguments.size() - 1); - for (size_t i = 0; i < nested_types.size() - 1; ++i) + DataTypes accumulator_and_array_types(arguments.size() - 1); + accumulator_and_array_types[0] = arguments.back(); + for (size_t i = 1; i < accumulator_and_array_types.size(); ++i) { - const auto * array_type = checkAndGetDataType(&*arguments[i + 1]); + const auto * array_type = checkAndGetDataType(&*arguments[i]); if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array, found {} instead", i + 2, getName(), arguments[i + 1]->getName()); - nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be of type Array, found {} instead", i + 1, getName(), arguments[i]->getName()); + accumulator_and_array_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); } - nested_types[nested_types.size() - 1] = arguments[arguments.size() - 1]; - const auto * function_type = checkAndGetDataType(arguments[0].get()); - if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for this overload of {} must be a function with {} arguments, found {} instead.", - getName(), nested_types.size(), arguments[0]->getName()); + const auto * lambda_function_type = checkAndGetDataType(arguments[0].get()); + if (!lambda_function_type || lambda_function_type->getArgumentTypes().size() != accumulator_and_array_types.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must be a lambda function with {} arguments, found {} instead.", + getName(), accumulator_and_array_types.size(), arguments[0]->getName()); - arguments[0] = std::make_shared(nested_types); + arguments[0] = std::make_shared(accumulator_and_array_types); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least 2 arguments, passed: {}.", getName(), arguments.size()); + if (arguments.size() < 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator", getName()); - const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); - if (!data_type_function) + const auto * lambda_function_type = checkAndGetDataType(arguments[0].type.get()); + if (!lambda_function_type) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName()); auto accumulator_type = arguments.back().type; - auto lambda_type = data_type_function->getReturnType(); + auto lambda_type = lambda_function_type->getReturnType(); if (!accumulator_type->equals(*lambda_type)) throw Exception(ErrorCodes::TYPE_MISMATCH, "Return type of lambda function must be the same as the accumulator type, inferred return type of lambda: {}, inferred type of accumulator: {}", @@ -71,12 +71,12 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto & lambda_with_type_and_name = arguments[0]; + const auto & lambda_function_with_type_and_name = arguments[0]; - if (!lambda_with_type_and_name.column) + if (!lambda_function_with_type_and_name.column) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName()); - const auto * lambda_function = typeid_cast(lambda_with_type_and_name.column.get()); + const auto * lambda_function = typeid_cast(lambda_function_with_type_and_name.column.get()); if (!lambda_function) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName()); @@ -85,6 +85,7 @@ public: const ColumnArray * column_first_array = nullptr; ColumnsWithTypeAndName arrays; arrays.reserve(arguments.size() - 1); + /// Validate input types and get input array columns in convenient form for (size_t i = 1; i < arguments.size() - 1; ++i) { @@ -131,8 +132,7 @@ public: if (rows_count == 0) return arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - ColumnPtr current_column; - current_column = arguments.back().column->convertToFullColumnIfConst(); + ColumnPtr current_column = arguments.back().column->convertToFullColumnIfConst(); MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); size_t max_array_size = 0; @@ -198,9 +198,9 @@ public: auto res_lambda = lambda_function->cloneResized(prev[1]->size()); auto * res_lambda_ptr = typeid_cast(res_lambda.get()); + res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); for (size_t i = 0; i < array_count; i++) res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); current_column = IColumn::mutate(res_lambda_ptr->reduce().column); prev_size = current_column->size(); diff --git a/src/Functions/currentProfiles.cpp b/src/Functions/currentProfiles.cpp index eee458f4f63..71b0eda5f26 100644 --- a/src/Functions/currentProfiles.cpp +++ b/src/Functions/currentProfiles.cpp @@ -17,39 +17,46 @@ namespace { enum class Kind { - CURRENT_PROFILES, - ENABLED_PROFILES, - DEFAULT_PROFILES, + currentProfiles, + enabledProfiles, + defaultProfiles, }; - template - class FunctionCurrentProfiles : public IFunction + String toString(Kind kind) + { + switch (kind) + { + case Kind::currentProfiles: return "currentProfiles"; + case Kind::enabledProfiles: return "enabledProfiles"; + case Kind::defaultProfiles: return "defaultProfiles"; + } + } + + class FunctionProfiles : public IFunction { public: - static constexpr auto name = (kind == Kind::CURRENT_PROFILES) ? "currentProfiles" : ((kind == Kind::ENABLED_PROFILES) ? "enabledProfiles" : "defaultProfiles"); - static FunctionPtr create(const ContextPtr & context) { return std::make_shared(context); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override + { + return false; + } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + String getName() const override + { + return toString(kind); + } - String getName() const override { return name; } - - explicit FunctionCurrentProfiles(const ContextPtr & context) + explicit FunctionProfiles(const ContextPtr & context, Kind kind_) + : kind(kind_) { const auto & manager = context->getAccessControl(); std::vector profile_ids; - if constexpr (kind == Kind::CURRENT_PROFILES) + + switch (kind) { - profile_ids = context->getCurrentProfiles(); - } - else if constexpr (kind == Kind::ENABLED_PROFILES) - { - profile_ids = context->getEnabledProfiles(); - } - else - { - static_assert(kind == Kind::DEFAULT_PROFILES); - profile_ids = context->getUser()->settings.toProfileIDs(); + case Kind::currentProfiles: profile_ids = context->getCurrentProfiles(); break; + case Kind::enabledProfiles: profile_ids = context->getEnabledProfiles(); break; + case Kind::defaultProfiles: profile_ids = context->getUser()->settings.toProfileIDs(); break; } profile_names = manager.tryReadNames(profile_ids); @@ -75,15 +82,16 @@ namespace } private: + Kind kind; Strings profile_names; }; } -REGISTER_FUNCTION(CurrentProfiles) +REGISTER_FUNCTION(Profiles) { - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction("currentProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::currentProfiles)); }); + factory.registerFunction("enabledProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::enabledProfiles)); }); + factory.registerFunction("defaultProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::defaultProfiles)); }); } } diff --git a/src/Functions/jsonMergePatch.cpp b/src/Functions/jsonMergePatch.cpp new file mode 100644 index 00000000000..ff790ba86b4 --- /dev/null +++ b/src/Functions/jsonMergePatch.cpp @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "config.h" + +#if USE_RAPIDJSON + +#include "rapidjson/document.h" +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/prettywriter.h" +#include "rapidjson/filereadstream.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + // select jsonMergePatch('{"a":1}','{"name": "joey"}','{"name": "tom"}','{"name": "zoey"}'); + // || + // \/ + // ┌───────────────────────┐ + // │ {"a":1,"name":"zoey"} │ + // └───────────────────────┘ + class FunctionjsonMergePatch : public IFunction + { + public: + static constexpr auto name = "jsonMergePatch"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + bool isVariadic() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); + + for (const auto & arg : arguments) + if (!isString(arg.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} requires string arguments", getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + chassert(!arguments.empty()); + + rapidjson::Document::AllocatorType allocator; + std::function merge_objects; + + merge_objects = [&merge_objects, &allocator](rapidjson::Value & dest, const rapidjson::Value & src) -> void + { + if (!src.IsObject()) + return; + + for (auto it = src.MemberBegin(); it != src.MemberEnd(); ++it) + { + rapidjson::Value key(it->name, allocator); + rapidjson::Value value(it->value, allocator); + if (dest.HasMember(key)) + { + if (dest[key].IsObject() && value.IsObject()) + merge_objects(dest[key], value); + else + dest[key] = value; + } + else + { + dest.AddMember(key, value, allocator); + } + } + }; + + auto parse_json_document = [](const ColumnString & column, rapidjson::Document & document, size_t i) + { + auto str_ref = column.getDataAt(i); + const char * json = str_ref.data; + + document.Parse(json); + if (document.HasParseError() || !document.IsObject()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong JSON string to merge. Expected JSON object"); + }; + + const auto * first_string = typeid_cast(arguments[0].column.get()); + if (!first_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be strings", getName()); + + std::vector merged_jsons; + merged_jsons.reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + auto & merged_json = merged_jsons.emplace_back(rapidjson::Type::kObjectType, &allocator); + parse_json_document(*first_string, merged_json, i); + } + + for (size_t col_idx = 1; col_idx < arguments.size(); ++col_idx) + { + const auto * column_string = typeid_cast(arguments[col_idx].column.get()); + if (!column_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Arguments of function {} must be strings", getName()); + + for (size_t i = 0; i < input_rows_count; ++i) + { + rapidjson::Document document(&allocator); + parse_json_document(*column_string, document, i); + merge_objects(merged_jsons[i], document); + } + } + + auto result = ColumnString::create(); + auto & result_string = assert_cast(*result); + rapidjson::CrtAllocator buffer_allocator; + + for (size_t i = 0; i < input_rows_count; ++i) + { + rapidjson::StringBuffer buffer(&buffer_allocator); + rapidjson::Writer writer(buffer); + + merged_jsons[i].Accept(writer); + result_string.insertData(buffer.GetString(), buffer.GetSize()); + } + + return result; + } + }; + +} + +REGISTER_FUNCTION(jsonMergePatch) +{ + factory.registerFunction(FunctionDocumentation{ + .description="Returns the merged JSON object string, which is formed by merging multiple JSON objects."}); +} + +} + +#endif diff --git a/src/Functions/subtractMicroseconds.cpp b/src/Functions/subtractMicroseconds.cpp new file mode 100644 index 00000000000..7a274a5021d --- /dev/null +++ b/src/Functions/subtractMicroseconds.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; +REGISTER_FUNCTION(SubtractMicroseconds) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/Functions/subtractMilliseconds.cpp b/src/Functions/subtractMilliseconds.cpp new file mode 100644 index 00000000000..a563e18b6bc --- /dev/null +++ b/src/Functions/subtractMilliseconds.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; +REGISTER_FUNCTION(SubtractMilliseconds) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/Functions/subtractNanoseconds.cpp b/src/Functions/subtractNanoseconds.cpp new file mode 100644 index 00000000000..fffb4eae37a --- /dev/null +++ b/src/Functions/subtractNanoseconds.cpp @@ -0,0 +1,16 @@ +#include +#include + + +namespace DB +{ + +using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; +REGISTER_FUNCTION(SubtractNanoseconds) +{ + factory.registerFunction(); +} + +} + + diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index fa2400a08a5..ad41e8afd65 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1139,7 +1139,7 @@ template void readDateTextFallback(LocalDate &, ReadBuffer &); template bool readDateTextFallback(LocalDate &, ReadBuffer &); -template +template ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) { static constexpr bool throw_exception = std::is_same_v; @@ -1155,11 +1155,30 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D char * s_pos = s; /** Read characters, that could represent unix timestamp. - * Only unix timestamp of at least 5 characters is supported. + * Only unix timestamp of at least 5 characters is supported by default, exception is thrown for a shorter one + * (unless parsing a string like '1.23' or '-12': there is no ambiguity, it is a DT64 timestamp). * Then look at 5th character. If it is a number - treat whole as unix timestamp. * If it is not a number - then parse datetime in YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format. */ + int negative_multiplier = 1; + + if (!buf.eof() && *buf.position() == '-') + { + if constexpr (dt64_mode) + { + negative_multiplier = -1; + ++buf.position(); + } + else + { + if constexpr (throw_exception) + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime"); + else + return false; + } + } + /// A piece similar to unix timestamp, maybe scaled to subsecond precision. while (s_pos < s + date_time_broken_down_length && !buf.eof() && isNumericASCII(*buf.position())) { @@ -1169,7 +1188,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D } /// 2015-01-01 01:02:03 or 2015-01-01 - if (s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position())) + /// if negative, it is a timestamp with no ambiguity + if (negative_multiplier == 1 && s_pos == s + 4 && !buf.eof() && !isNumericASCII(*buf.position())) { const auto already_read_length = s_pos - s; const size_t remaining_date_size = date_broken_down_length - already_read_length; @@ -1220,27 +1240,34 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D } else { - if (s_pos - s >= 5) + datetime = 0; + bool too_short = s_pos - s <= 4; + + if (!too_short || dt64_mode) { /// Not very efficient. - datetime = 0; for (const char * digit_pos = s; digit_pos < s_pos; ++digit_pos) datetime = datetime * 10 + *digit_pos - '0'; } - else + datetime *= negative_multiplier; + + if (too_short && negative_multiplier != -1) { if constexpr (throw_exception) - throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime"); + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime"); else return false; } + } return ReturnType(true); } -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); void skipJSONField(ReadBuffer & buf, StringRef name_of_field) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 415f4e100d2..40f812050db 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -909,15 +909,28 @@ inline T parseFromString(std::string_view str) } -template +template ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); /** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone. * As an exception, also supported parsing of unix timestamp in form of decimal number. */ -template +template inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) { + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (!dt64_mode) + { + if (!buf.eof() && !isNumericASCII(*buf.position())) + { + if constexpr (throw_exception) + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime"); + else + return false; + } + } + /// Optimistic path, when whole value is in buffer. const char * s = buf.position(); @@ -965,19 +978,30 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons return readIntTextImpl(datetime, buf); } else - return readDateTimeTextFallback(datetime, buf, date_lut); + return readDateTimeTextFallback(datetime, buf, date_lut); } template inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) { - time_t whole; - if (!readDateTimeTextImpl(whole, buf, date_lut)) + time_t whole = 0; + bool is_negative_timestamp = (!buf.eof() && *buf.position() == '-'); + bool is_empty = buf.eof(); + + if (!is_empty) { - return ReturnType(false); + try + { + readDateTimeTextImpl(whole, buf, date_lut); + } + catch (const DB::ParsingException & exception) + { + if (buf.eof() || *buf.position() != '.') + throw exception; + } } - int negative_multiplier = 1; + int negative_fraction_multiplier = 1; DB::DecimalUtils::DecimalComponents components{static_cast(whole), 0}; @@ -1005,18 +1029,18 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re while (!buf.eof() && isNumericASCII(*buf.position())) ++buf.position(); - /// Fractional part (subseconds) is treated as positive by users - /// (as DateTime64 itself is a positive, although underlying decimal is negative) - /// setting fractional part to be negative when whole is 0 results in wrong value, - /// so we multiply result by -1. - if (components.whole < 0 && components.fractional != 0) + /// Fractional part (subseconds) is treated as positive by users, but represented as a negative number. + /// E.g. `1925-12-12 13:14:15.123` is represented internally as timestamp `-1390214744.877`. + /// Thus need to convert . to .<1-0.> + /// Also, setting fractional part to be negative when whole is 0 results in wrong value, in this case multiply result by -1. + if (!is_negative_timestamp && components.whole < 0 && components.fractional != 0) { const auto scale_multiplier = DecimalUtils::scaleMultiplier(scale); ++components.whole; components.fractional = scale_multiplier - components.fractional; if (!components.whole) { - negative_multiplier = -1; + negative_fraction_multiplier = -1; } } } @@ -1032,13 +1056,13 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re bool is_ok = true; if constexpr (std::is_same_v) { - datetime64 = DecimalUtils::decimalFromComponents(components, scale) * negative_multiplier; + datetime64 = DecimalUtils::decimalFromComponents(components, scale) * negative_fraction_multiplier; } else { is_ok = DecimalUtils::tryGetDecimalFromComponents(components, scale, datetime64); if (is_ok) - datetime64 *= negative_multiplier; + datetime64 *= negative_fraction_multiplier; } return ReturnType(is_ok); diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e16064db713..ac0ebc44bec 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2334,29 +2334,6 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va return block; } -Block Aggregator::prepareBlockAndFillWithoutKeySnapshot(AggregatedDataVariants & data_variants) const -{ - size_t rows = 1; - bool final = true; - - auto && out_cols - = prepareOutputBlockColumns(params, aggregate_functions, getHeader(final), data_variants.aggregates_pools, final, rows); - auto && [key_columns, raw_key_columns, aggregate_columns, final_aggregate_columns, aggregate_columns_data] = out_cols; - - AggregatedDataWithoutKey & data = data_variants.without_key; - - /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'. - for (size_t insert_i = 0; insert_i < params.aggregates_size; ++insert_i) - aggregate_functions[insert_i]->insertResultInto( - data + offsets_of_aggregate_states[insert_i], - *final_aggregate_columns[insert_i], - data_variants.aggregates_pool); - - Block block = finalizeBlock(params, getHeader(final), std::move(out_cols), final, rows); - - return block; -} - template Aggregator::ConvertToBlockRes Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 4acf73ce50f..ab53f76d2ce 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1217,7 +1217,6 @@ private: friend class ConvertingAggregatedToChunksSource; friend class ConvertingAggregatedToChunksWithMergingSource; friend class AggregatingInOrderTransform; - friend class AggregatingPartialResultTransform; /// Data structure of source blocks. Block header; @@ -1402,7 +1401,6 @@ private: std::atomic * is_cancelled = nullptr) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; - Block prepareBlockAndFillWithoutKeySnapshot(AggregatedDataVariants & data_variants) const; BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; template diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d0b3e9d1990..cec24f20cd1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -181,7 +182,7 @@ struct ContextSharedPart : boost::noncopyable Poco::Logger * log = &Poco::Logger::get("Context"); /// For access of most of shared objects. - mutable SharedMutex mutex; + mutable ContextSharedMutex mutex; /// Separate mutex for access of dictionaries. Separate mutex to avoid locks when server doing request to itself. mutable std::mutex embedded_dictionaries_mutex; mutable std::mutex external_dictionaries_mutex; @@ -193,48 +194,46 @@ struct ContextSharedPart : boost::noncopyable /// Separate mutex for re-initialization of zookeeper session. This operation could take a long time and must not interfere with another operations. mutable std::mutex zookeeper_mutex; - mutable zkutil::ZooKeeperPtr zookeeper; /// Client for ZooKeeper. - ConfigurationPtr zookeeper_config; /// Stores zookeeper configs + mutable zkutil::ZooKeeperPtr zookeeper TSA_GUARDED_BY(zookeeper_mutex); /// Client for ZooKeeper. + ConfigurationPtr zookeeper_config TSA_GUARDED_BY(zookeeper_mutex); /// Stores zookeeper configs #if USE_NURAFT mutable std::mutex keeper_dispatcher_mutex; - mutable std::shared_ptr keeper_dispatcher; + mutable std::shared_ptr keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex); #endif mutable std::mutex auxiliary_zookeepers_mutex; - mutable std::map auxiliary_zookeepers; /// Map for auxiliary ZooKeeper clients. - ConfigurationPtr auxiliary_zookeepers_config; /// Stores auxiliary zookeepers configs + mutable std::map auxiliary_zookeepers TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Map for auxiliary ZooKeeper clients. + ConfigurationPtr auxiliary_zookeepers_config TSA_GUARDED_BY(auxiliary_zookeepers_mutex); /// Stores auxiliary zookeepers configs + /// No lock required for interserver_io_host, interserver_io_port, interserver_scheme modified only during initialization String interserver_io_host; /// The host name by which this server is available for other servers. UInt16 interserver_io_port = 0; /// and port. String interserver_scheme; /// http or https MultiVersion interserver_io_credentials; - String path; /// Path to the data directory, with a slash at the end. - String flags_path; /// Path to the directory with some control flags for server maintenance. - String user_files_path; /// Path to the directory with user provided files, usable by 'file' table function. - String dictionaries_lib_path; /// Path to the directory with user provided binaries and libraries for external dictionaries. - String user_scripts_path; /// Path to the directory with user provided scripts. - String filesystem_caches_path; /// Path to the directory with filesystem caches. - ConfigurationPtr config; /// Global configuration settings. - - String tmp_path; /// Path to the temporary files that occur when processing the request. + String path TSA_GUARDED_BY(mutex); /// Path to the data directory, with a slash at the end. + String flags_path TSA_GUARDED_BY(mutex); /// Path to the directory with some control flags for server maintenance. + String user_files_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided files, usable by 'file' table function. + String dictionaries_lib_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided binaries and libraries for external dictionaries. + String user_scripts_path TSA_GUARDED_BY(mutex); /// Path to the directory with user provided scripts. + String filesystem_caches_path TSA_GUARDED_BY(mutex); /// Path to the directory with filesystem caches. + ConfigurationPtr config TSA_GUARDED_BY(mutex); /// Global configuration settings. + String tmp_path TSA_GUARDED_BY(mutex); /// Path to the temporary files that occur when processing the request. /// All temporary files that occur when processing the requests accounted here. /// Child scopes for more fine-grained accounting are created per user/query/etc. /// Initialized once during server startup. - TemporaryDataOnDiskScopePtr root_temp_data_on_disk; + TemporaryDataOnDiskScopePtr root_temp_data_on_disk TSA_GUARDED_BY(mutex); - mutable std::unique_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. - mutable std::unique_ptr external_dictionaries_loader; + mutable std::unique_ptr embedded_dictionaries TSA_GUARDED_BY(embedded_dictionaries_mutex); /// Metrica's dictionaries. Have lazy initialization. + mutable std::unique_ptr external_dictionaries_loader TSA_GUARDED_BY(external_dictionaries_mutex); - scope_guard models_repository_guard; + ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository TSA_GUARDED_BY(external_dictionaries_mutex) = nullptr; + scope_guard dictionaries_xmls TSA_GUARDED_BY(external_dictionaries_mutex); - ExternalLoaderXMLConfigRepository * external_dictionaries_config_repository = nullptr; - scope_guard dictionaries_xmls; - - mutable std::unique_ptr external_user_defined_executable_functions_loader; - ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository = nullptr; - scope_guard user_defined_executable_functions_xmls; + mutable std::unique_ptr external_user_defined_executable_functions_loader TSA_GUARDED_BY(external_user_defined_executable_functions_mutex); + ExternalLoaderXMLConfigRepository * user_defined_executable_functions_config_repository TSA_GUARDED_BY(external_user_defined_executable_functions_mutex) = nullptr; + scope_guard user_defined_executable_functions_xmls TSA_GUARDED_BY(external_user_defined_executable_functions_mutex); mutable OnceFlag user_defined_sql_objects_loader_initialized; mutable std::unique_ptr user_defined_sql_objects_loader; @@ -250,29 +249,30 @@ struct ContextSharedPart : boost::noncopyable mutable OnceFlag backups_worker_initialized; std::optional backups_worker; + /// No lock required for default_profile_name, system_profile_name, buffer_profile_name modified only during initialization String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying - std::unique_ptr access_control; + std::unique_ptr access_control TSA_GUARDED_BY(mutex); mutable OnceFlag resource_manager_initialized; mutable ResourceManagerPtr resource_manager; - mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. - mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. + mutable UncompressedCachePtr uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks. + mutable MarkCachePtr mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files. mutable OnceFlag load_marks_threadpool_initialized; mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. mutable OnceFlag prefetch_threadpool_initialized; mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. - mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. - mutable QueryCachePtr query_cache; /// Cache of query results. - mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. - mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. + mutable UncompressedCachePtr index_uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks for MergeTree indices. + mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results. + mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices. + mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. ProcessList process_list; /// Executing queries at the moment. SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) ReplicatedFetchList replicated_fetch_list; - ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. + ConfigurationPtr users_config TSA_GUARDED_BY(mutex); /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. OnceFlag buffer_flush_schedule_pool_initialized; @@ -304,67 +304,72 @@ struct ContextSharedPart : boost::noncopyable mutable ThrottlerPtr backups_server_throttler; /// A server-wide throttler for BACKUPs MultiVersion macros; /// Substitutions extracted from config. - std::unique_ptr ddl_worker; /// Process ddl commands from zk. + std::unique_ptr ddl_worker TSA_GUARDED_BY(mutex); /// Process ddl commands from zk. /// Rules for selecting the compression settings, depending on the size of the part. - mutable std::unique_ptr compression_codec_selector; + mutable std::unique_ptr compression_codec_selector TSA_GUARDED_BY(mutex); /// Storage disk chooser for MergeTree engines - mutable std::shared_ptr merge_tree_disk_selector; + mutable std::shared_ptr merge_tree_disk_selector TSA_GUARDED_BY(storage_policies_mutex); /// Storage policy chooser for MergeTree engines - mutable std::shared_ptr merge_tree_storage_policy_selector; + mutable std::shared_ptr merge_tree_storage_policy_selector TSA_GUARDED_BY(storage_policies_mutex); ServerSettings server_settings; - std::optional merge_tree_settings; /// Settings of MergeTree* engines. - std::optional replicated_merge_tree_settings; /// Settings of ReplicatedMergeTree* engines. + std::optional merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines. + std::optional replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines. std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default) std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) + /// No lock required for format_schema_path modified only during initialization String format_schema_path; /// Path to a directory that contains schema files used by input formats. mutable OnceFlag action_locks_manager_initialized; ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers OnceFlag system_logs_initialized; - std::unique_ptr system_logs; /// Used to log queries and operations on parts - std::optional storage_s3_settings; /// Settings of S3 storage - std::vector warnings; /// Store warning messages about server configuration. + std::unique_ptr system_logs TSA_GUARDED_BY(mutex); /// Used to log queries and operations on parts + std::optional storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage + std::vector warnings TSA_GUARDED_BY(mutex); /// Store warning messages about server configuration. /// Background executors for *MergeTree tables - MergeMutateBackgroundExecutorPtr merge_mutate_executor; - OrdinaryBackgroundExecutorPtr moves_executor; - OrdinaryBackgroundExecutorPtr fetch_executor; - OrdinaryBackgroundExecutorPtr common_executor; + /// Has background executors for MergeTree tables been initialized? + mutable ContextSharedMutex background_executors_mutex; + bool are_background_executors_initialized TSA_GUARDED_BY(background_executors_mutex) = false; + MergeMutateBackgroundExecutorPtr merge_mutate_executor TSA_GUARDED_BY(background_executors_mutex); + OrdinaryBackgroundExecutorPtr moves_executor TSA_GUARDED_BY(background_executors_mutex); + OrdinaryBackgroundExecutorPtr fetch_executor TSA_GUARDED_BY(background_executors_mutex); + OrdinaryBackgroundExecutorPtr common_executor TSA_GUARDED_BY(background_executors_mutex); - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml - HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml + RemoteHostFilter remote_host_filter TSA_GUARDED_BY(mutex); /// Allowed URL from config.xml + HTTPHeaderFilter http_header_filter TSA_GUARDED_BY(mutex); /// Forbidden HTTP headers from config.xml + /// No lock required for trace_collector modified only during initialization std::optional trace_collector; /// Thread collecting traces from threads executing queries /// Clusters for distributed tables /// Initialized on demand (on distributed storages initialization) since Settings should be initialized - std::shared_ptr clusters; - ConfigurationPtr clusters_config; /// Stores updated configs - std::unique_ptr cluster_discovery; mutable std::mutex clusters_mutex; /// Guards clusters, clusters_config and cluster_discovery + std::shared_ptr clusters TSA_GUARDED_BY(clusters_mutex); + ConfigurationPtr clusters_config TSA_GUARDED_BY(clusters_mutex); /// Stores updated configs + std::unique_ptr cluster_discovery TSA_GUARDED_BY(clusters_mutex); + /// No lock required for async_insert_queue modified only during initialization std::shared_ptr async_insert_queue; + std::map server_ports; - bool shutdown_called = false; + std::atomic shutdown_called = false; - /// Has background executors for MergeTree tables been initialized? - bool are_background_executors_initialized = false; - - Stopwatch uptime_watch; + Stopwatch uptime_watch TSA_GUARDED_BY(mutex); + /// No lock required for application_type modified only during initialization Context::ApplicationType application_type = Context::ApplicationType::SERVER; /// vector of xdbc-bridge commands, they will be killed when Context will be destroyed - std::vector> bridge_commands; + std::vector> bridge_commands TSA_GUARDED_BY(mutex); + /// No lock required for config_reload_callback, start_servers_callback, stop_servers_callback modified only during initialization Context::ConfigReloadCallback config_reload_callback; - Context::StartStopServersCallback start_servers_callback; Context::StartStopServersCallback stop_servers_callback; - bool is_server_completely_started = false; + bool is_server_completely_started TSA_GUARDED_BY(mutex) = false; ContextSharedPart() : access_control(std::make_unique()) @@ -480,14 +485,34 @@ struct ContextSharedPart : boost::noncopyable } } + void setConfig(const ConfigurationPtr & config_value) + { + if (!config_value) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set nullptr config is invalid"); + + std::lock_guard lock(mutex); + config = config_value; + access_control->setExternalAuthenticatorsConfig(*config_value); + } + + const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::lock_guard &) const TSA_REQUIRES(this->mutex) + { + return config ? *config : Poco::Util::Application::instance().config(); + } + + const Poco::Util::AbstractConfiguration & getConfigRef() const + { + SharedLockGuard lock(mutex); + return config ? *config : Poco::Util::Application::instance().config(); + } /** Perform a complex job of destroying objects in advance. */ - void shutdown() + void shutdown() TSA_NO_THREAD_SAFETY_ANALYSIS { - if (shutdown_called) + bool is_shutdown_called = shutdown_called.exchange(true); + if (is_shutdown_called) return; - shutdown_called = true; /// Need to flush the async insert queue before shutting down the database catalog async_insert_queue.reset(); @@ -537,7 +562,7 @@ struct ContextSharedPart : boost::noncopyable /// Cause it can call Context::getZooKeeper and resurrect it. { - auto lock = std::lock_guard(mutex); + std::lock_guard lock(mutex); delete_ddl_worker = std::move(ddl_worker); } @@ -554,7 +579,7 @@ struct ContextSharedPart : boost::noncopyable cache->cache->deactivateBackgroundOperations(); { - auto lock = std::lock_guard(mutex); + std::lock_guard lock(mutex); /** Compiled expressions stored in cache need to be destroyed before destruction of static objects. * Because CHJIT instance can be static object. @@ -578,7 +603,6 @@ struct ContextSharedPart : boost::noncopyable /// but at least they can be preserved for storage termination. dictionaries_xmls.reset(); user_defined_executable_functions_xmls.reset(); - models_repository_guard.reset(); delete_system_logs = std::move(system_logs); delete_embedded_dictionaries = std::move(embedded_dictionaries); @@ -628,7 +652,7 @@ struct ContextSharedPart : boost::noncopyable trace_collector.emplace(std::move(trace_log)); } - void addWarningMessage(const String & message) + void addWarningMessage(const String & message) TSA_REQUIRES(mutex) { /// A warning goes both: into server's log; stored to be placed in `system.warnings` table. log->warning(message); @@ -660,6 +684,23 @@ struct ContextSharedPart : boost::noncopyable } }; +void ContextSharedMutex::lockImpl() +{ + ProfileEvents::increment(ProfileEvents::ContextLock); + CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; + Stopwatch watch; + Base::lockImpl(); + ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); +} + +void ContextSharedMutex::lockSharedImpl() +{ + ProfileEvents::increment(ProfileEvents::ContextLock); + CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; + Stopwatch watch; + Base::lockSharedImpl(); + ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); +} ContextData::ContextData() = default; ContextData::ContextData(const ContextData &) = default; @@ -698,7 +739,7 @@ SharedContextHolder Context::createShared() ContextMutablePtr Context::createCopy(const ContextPtr & other) { - auto lock = other->getLocalSharedLock(); + SharedLockGuard lock(other->mutex); return std::shared_ptr(new Context(*other)); } @@ -720,46 +761,6 @@ Context::~Context() = default; InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; } const InterserverIOHandler & Context::getInterserverIOHandler() const { return shared->interserver_io_handler; } -std::unique_lock Context::getGlobalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getGlobalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(shared->mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::unique_lock Context::getLocalLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::unique_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - -std::shared_lock Context::getLocalSharedLock() const -{ - ProfileEvents::increment(ProfileEvents::ContextLock); - CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - Stopwatch watch; - auto lock = std::shared_lock(mutex); - ProfileEvents::increment(ProfileEvents::ContextLockWaitMicroseconds, watch.elapsedMicroseconds()); - return lock; -} - ProcessList & Context::getProcessList() { return shared->process_list; } const ProcessList & Context::getProcessList() const { return shared->process_list; } OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; } @@ -783,37 +784,37 @@ String Context::resolveDatabase(const String & database_name) const String Context::getPath() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->path; } String Context::getFlagsPath() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->flags_path; } String Context::getUserFilesPath() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->user_files_path; } String Context::getDictionariesLibPath() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->dictionaries_lib_path; } String Context::getUserScriptsPath() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->user_scripts_path; } String Context::getFilesystemCachesPath() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->filesystem_caches_path; } @@ -821,7 +822,7 @@ Strings Context::getWarnings() const { Strings common_warnings; { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); common_warnings = shared->warnings; } /// Make setting's name ordered @@ -856,7 +857,7 @@ Strings Context::getWarnings() const /// TODO: remove, use `getTempDataOnDisk` VolumePtr Context::getGlobalTemporaryVolume() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); /// Calling this method we just bypass the `temp_data_on_disk` and write to the file on the volume directly. /// Volume is the same for `root_temp_data_on_disk` (always set) and `temp_data_on_disk` (if it's set). if (shared->root_temp_data_on_disk) @@ -869,13 +870,13 @@ TemporaryDataOnDiskScopePtr Context::getTempDataOnDisk() const if (temp_data_on_disk) return temp_data_on_disk; - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->root_temp_data_on_disk; } TemporaryDataOnDiskScopePtr Context::getSharedTempDataOnDisk() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->root_temp_data_on_disk; } @@ -888,7 +889,7 @@ void Context::setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_) void Context::setPath(const String & path) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->path = path; @@ -910,7 +911,7 @@ void Context::setPath(const String & path) void Context::setFilesystemCachesPath(const String & path) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!fs::path(path).is_absolute()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem caches path must be absolute: {}", path); @@ -958,7 +959,7 @@ static VolumePtr createLocalSingleDiskVolume(const std::string & path, const Poc void Context::setTemporaryStoragePath(const String & path, size_t max_size) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->root_temp_data_on_disk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set"); @@ -967,7 +968,7 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size) if (!shared->tmp_path.ends_with('/')) shared->tmp_path += '/'; - VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRefWithLock(lock)); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, shared->getConfigRefWithLock(lock)); for (const auto & disk : volume->getDisks()) { @@ -1015,7 +1016,7 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s setupTmpPath(shared->log, disk->getPath()); } - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->root_temp_data_on_disk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set"); @@ -1029,7 +1030,7 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t if (!disk_ptr) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Disk '{}' is not found", cache_disk_name); - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->root_temp_data_on_disk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set"); @@ -1040,38 +1041,39 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath()); shared->tmp_path = file_cache->getBasePath(); - VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRefWithLock(lock)); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, shared->getConfigRefWithLock(lock)); shared->root_temp_data_on_disk = std::make_shared(volume, file_cache.get(), max_size); } void Context::setFlagsPath(const String & path) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->flags_path = path; } void Context::setUserFilesPath(const String & path) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->user_files_path = path; } void Context::setDictionariesLibPath(const String & path) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->dictionaries_lib_path = path; } void Context::setUserScriptsPath(const String & path) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->user_scripts_path = path; } void Context::addWarningMessage(const String & msg) const { - auto lock = getGlobalLock(); - auto suppress_re = getConfigRefWithLock(lock).getString("warning_supress_regexp", ""); + std::lock_guard lock(shared->mutex); + auto suppress_re = shared->getConfigRefWithLock(lock).getString("warning_supress_regexp", ""); + bool is_supressed = !suppress_re.empty() && re2::RE2::PartialMatch(msg, suppress_re); if (!is_supressed) shared->addWarningMessage(msg); @@ -1079,58 +1081,48 @@ void Context::addWarningMessage(const String & msg) const void Context::setConfig(const ConfigurationPtr & config) { - if (!config) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Set nullptr config is invalid"); - - auto lock = getGlobalLock(); - shared->config = config; - shared->access_control->setExternalAuthenticatorsConfig(*shared->config); -} - -const Poco::Util::AbstractConfiguration & Context::getConfigRefWithLock(const std::unique_lock &) const -{ - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); + shared->setConfig(config); } const Poco::Util::AbstractConfiguration & Context::getConfigRef() const { - auto lock = getGlobalSharedLock(); - return shared->config ? *shared->config : Poco::Util::Application::instance().config(); + return shared->getConfigRef(); } - AccessControl & Context::getAccessControl() { + SharedLockGuard lock(shared->mutex); return *shared->access_control; } const AccessControl & Context::getAccessControl() const { + SharedLockGuard lock(shared->mutex); return *shared->access_control; } void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->access_control->setExternalAuthenticatorsConfig(config); } std::unique_ptr Context::makeGSSAcceptorContext() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return std::make_unique(shared->access_control->getExternalAuthenticators().getKerberosParams()); } void Context::setUsersConfig(const ConfigurationPtr & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->users_config = config; shared->access_control->setUsersConfig(*shared->users_config); } ConfigurationPtr Context::getUsersConfig() { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->users_config; } @@ -1150,7 +1142,7 @@ void Context::setUser(const UUID & user_id_, const std::optionalgetUserName(); } -void Context::setUserIDWithLock(const UUID & user_id_, const std::unique_lock &) +void Context::setUserIDWithLock(const UUID & user_id_, const std::lock_guard &) { user_id = user_id_; need_recalculate_access = true; @@ -1183,17 +1175,17 @@ void Context::setUserIDWithLock(const UUID & user_id_, const std::unique_lock Context::getUserID() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); return user_id; } -void Context::setCurrentRolesWithLock(const std::vector & current_roles_, const std::unique_lock &) +void Context::setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard &) { if (current_roles_.empty()) current_roles = nullptr; @@ -1204,7 +1196,7 @@ void Context::setCurrentRolesWithLock(const std::vector & current_roles_, void Context::setCurrentRoles(const std::vector & current_roles_) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setCurrentRolesWithLock(current_roles_, lock); } @@ -1264,7 +1256,7 @@ std::shared_ptr Context::getAccess() const std::optional params; { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); if (access && !need_recalculate_access) return access; /// No need to recalculate access rights. @@ -1284,7 +1276,7 @@ std::shared_ptr Context::getAccess() const { /// If the parameters of access rights were not changed while we were calculated them /// then we store the new access rights in the Context to allow reusing it later. - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (get_params() == *params) { access = res; @@ -1312,7 +1304,7 @@ std::optional Context::getQuotaUsage() const return getAccess()->getQuotaUsage(); } -void Context::setCurrentProfileWithLock(const String & profile_name, bool check_constraints, const std::unique_lock & lock) +void Context::setCurrentProfileWithLock(const String & profile_name, bool check_constraints, const std::lock_guard & lock) { try { @@ -1326,13 +1318,13 @@ void Context::setCurrentProfileWithLock(const String & profile_name, bool check_ } } -void Context::setCurrentProfileWithLock(const UUID & profile_id, bool check_constraints, const std::unique_lock & lock) +void Context::setCurrentProfileWithLock(const UUID & profile_id, bool check_constraints, const std::lock_guard & lock) { auto profile_info = getAccessControl().getSettingsProfileInfo(profile_id); setCurrentProfilesWithLock(*profile_info, check_constraints, lock); } -void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::unique_lock & lock) +void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard & lock) { if (check_constraints) checkSettingsConstraintsWithLock(profiles_info.settings, SettingSource::PROFILE); @@ -1342,31 +1334,31 @@ void Context::setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_i void Context::setCurrentProfile(const String & profile_name, bool check_constraints) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setCurrentProfileWithLock(profile_name, check_constraints, lock); } void Context::setCurrentProfile(const UUID & profile_id, bool check_constraints) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setCurrentProfileWithLock(profile_id, check_constraints, lock); } void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setCurrentProfilesWithLock(profiles_info, check_constraints, lock); } std::vector Context::getCurrentProfiles() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); return settings_constraints_and_current_profiles->current_profiles; } std::vector Context::getEnabledProfiles() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); return settings_constraints_and_current_profiles->enabled_profiles; } @@ -1382,7 +1374,7 @@ ResourceManagerPtr Context::getResourceManager() const ClassifierPtr Context::getWorkloadClassifier() const { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!classifier) classifier = getResourceManager()->acquire(getSettingsRef().workload); return classifier; @@ -1420,7 +1412,7 @@ Tables Context::getExternalTables() const if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); Tables res; for (const auto & table : external_tables_mapping) @@ -1447,7 +1439,7 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have external tables"); - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists.", backQuoteIfNeed(table_name)); external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); @@ -1460,7 +1452,7 @@ std::shared_ptr Context::findExternalTable(const String & std::shared_ptr holder; { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); auto iter = external_tables_mapping.find(table_name); if (iter == external_tables_mapping.end()) return {}; @@ -1476,7 +1468,7 @@ std::shared_ptr Context::removeExternalTable(const String std::shared_ptr holder; { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); auto iter = external_tables_mapping.find(table_name); if (iter == external_tables_mapping.end()) return {}; @@ -1859,18 +1851,18 @@ bool Context::displaySecretsInShowAndSelect() const Settings Context::getSettings() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); return settings; } void Context::setSettings(const Settings & settings_) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); settings = settings_; need_recalculate_access = true; } -void Context::setSettingWithLock(std::string_view name, const String & value, const std::unique_lock & lock) +void Context::setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock) { if (name == "profile") { @@ -1882,7 +1874,7 @@ void Context::setSettingWithLock(std::string_view name, const String & value, co need_recalculate_access = true; } -void Context::setSettingWithLock(std::string_view name, const Field & value, const std::unique_lock & lock) +void Context::setSettingWithLock(std::string_view name, const Field & value, const std::lock_guard & lock) { if (name == "profile") { @@ -1894,7 +1886,7 @@ void Context::setSettingWithLock(std::string_view name, const Field & value, con need_recalculate_access = true; } -void Context::applySettingChangeWithLock(const SettingChange & change, const std::unique_lock & lock) +void Context::applySettingChangeWithLock(const SettingChange & change, const std::lock_guard & lock) { try { @@ -1909,7 +1901,7 @@ void Context::applySettingChangeWithLock(const SettingChange & change, const std } } -void Context::applySettingsChangesWithLock(const SettingsChanges & changes, const std::unique_lock & lock) +void Context::applySettingsChangesWithLock(const SettingsChanges & changes, const std::lock_guard& lock) { for (const SettingChange & change : changes) applySettingChangeWithLock(change, lock); @@ -1918,13 +1910,13 @@ void Context::applySettingsChangesWithLock(const SettingsChanges & changes, cons void Context::setSetting(std::string_view name, const String & value) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setSettingWithLock(name, value, lock); } void Context::setSetting(std::string_view name, const Field & value) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setSettingWithLock(name, value, lock); } @@ -1946,7 +1938,7 @@ void Context::applySettingChange(const SettingChange & change) void Context::applySettingsChanges(const SettingsChanges & changes) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); applySettingsChangesWithLock(changes, lock); } @@ -1982,43 +1974,43 @@ void Context::checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const { - auto shared_lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); checkSettingsConstraintsWithLock(profile_elements, source); } void Context::checkSettingsConstraints(const SettingChange & change, SettingSource source) const { - auto shared_lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); checkSettingsConstraintsWithLock(change, source); } void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const { - auto shared_lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); getSettingsConstraintsAndCurrentProfilesWithLock()->constraints.check(settings, changes, source); } void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - auto shared_lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); checkSettingsConstraintsWithLock(changes, source); } void Context::clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - auto shared_lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); clampToSettingsConstraintsWithLock(changes, source); } void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const { - auto shared_lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); checkMergeTreeSettingsConstraintsWithLock(merge_tree_settings, changes); } void Context::resetSettingsToDefaultValue(const std::vector & names) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); for (const String & name: names) settings.setDefaultValue(name); } @@ -2033,13 +2025,13 @@ std::shared_ptr Context::getSettingsCons std::shared_ptr Context::getSettingsConstraintsAndCurrentProfiles() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); return getSettingsConstraintsAndCurrentProfilesWithLock(); } String Context::getCurrentDatabase() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); return current_database; } @@ -2056,7 +2048,7 @@ void Context::setCurrentDatabaseNameInGlobalContext(const String & name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set current database for non global context, this method should " "be used during server initialization"); - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!current_database.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Default database name cannot be changed in global context without server restart"); @@ -2064,7 +2056,7 @@ void Context::setCurrentDatabaseNameInGlobalContext(const String & name) current_database = name; } -void Context::setCurrentDatabaseWithLock(const String & name, const std::unique_lock &) +void Context::setCurrentDatabaseWithLock(const String & name, const std::lock_guard &) { DatabaseCatalog::instance().assertDatabaseExists(name); current_database = name; @@ -2073,7 +2065,7 @@ void Context::setCurrentDatabaseWithLock(const String & name, const std::unique_ void Context::setCurrentDatabase(const String & name) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); setCurrentDatabaseWithLock(name, lock); } @@ -2253,10 +2245,10 @@ const ExternalDictionariesLoader & Context::getExternalDictionariesLoader() cons ExternalDictionariesLoader & Context::getExternalDictionariesLoader() { std::lock_guard lock(shared->external_dictionaries_mutex); - return getExternalDictionariesLoaderUnlocked(); + return getExternalDictionariesLoaderWithLock(lock); } -ExternalDictionariesLoader & Context::getExternalDictionariesLoaderUnlocked() +ExternalDictionariesLoader & Context::getExternalDictionariesLoaderWithLock(const std::lock_guard &) TSA_REQUIRES(shared->external_dictionaries_mutex) { if (!shared->external_dictionaries_loader) shared->external_dictionaries_loader = @@ -2272,10 +2264,11 @@ const ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDef ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedExecutableFunctionsLoader() { std::lock_guard lock(shared->external_user_defined_executable_functions_mutex); - return getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); + return getExternalUserDefinedExecutableFunctionsLoaderWithLock(lock); } -ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedExecutableFunctionsLoaderUnlocked() +ExternalUserDefinedExecutableFunctionsLoader & +Context::getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard &) TSA_REQUIRES(shared->external_user_defined_executable_functions_mutex) { if (!shared->external_user_defined_executable_functions_loader) shared->external_user_defined_executable_functions_loader = @@ -2315,7 +2308,7 @@ void Context::loadOrReloadDictionaries(const Poco::Util::AbstractConfiguration & std::lock_guard lock(shared->external_dictionaries_mutex); - auto & external_dictionaries_loader = getExternalDictionariesLoaderUnlocked(); + auto & external_dictionaries_loader = getExternalDictionariesLoaderWithLock(lock); external_dictionaries_loader.enableAlwaysLoadEverything(!dictionaries_lazy_load); if (shared->external_dictionaries_config_repository) @@ -2339,7 +2332,7 @@ void Context::loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::Abstr std::lock_guard lock(shared->external_user_defined_executable_functions_mutex); - auto & external_user_defined_executable_functions_loader = getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); + auto & external_user_defined_executable_functions_loader = getExternalUserDefinedExecutableFunctionsLoaderWithLock(lock); if (shared->user_defined_executable_functions_config_repository) { @@ -2361,7 +2354,7 @@ const IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() c shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); }); - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return *shared->user_defined_sql_objects_loader; } @@ -2371,7 +2364,7 @@ IUserDefinedSQLObjectsLoader & Context::getUserDefinedSQLObjectsLoader() shared->user_defined_sql_objects_loader = createUserDefinedSQLObjectsLoader(getGlobalContext()); }); - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return *shared->user_defined_sql_objects_loader; } @@ -2399,9 +2392,6 @@ Lemmatizers & Context::getLemmatizers() const BackupsWorker & Context::getBackupsWorker() const { callOnce(shared->backups_worker_initialized, [&] { - if (shared->backups_worker) - return; - const auto & config = getConfigRef(); const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true); const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true); @@ -2413,7 +2403,6 @@ BackupsWorker & Context::getBackupsWorker() const shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores); }); - auto lock = getGlobalSharedLock(); return *shared->backups_worker; } @@ -2457,7 +2446,7 @@ QueryStatusPtr Context::getProcessListElementSafe() const void Context::setUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache has been already created."); @@ -2467,7 +2456,7 @@ void Context::setUncompressedCache(const String & cache_policy, size_t max_size_ void Context::updateUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet."); @@ -2478,13 +2467,13 @@ void Context::updateUncompressedCacheConfiguration(const Poco::Util::AbstractCon UncompressedCachePtr Context::getUncompressedCache() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->uncompressed_cache; } void Context::clearUncompressedCache() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->uncompressed_cache) shared->uncompressed_cache->clear(); @@ -2492,7 +2481,7 @@ void Context::clearUncompressedCache() const void Context::setMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->mark_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache has been already created."); @@ -2502,7 +2491,7 @@ void Context::setMarkCache(const String & cache_policy, size_t max_cache_size_in void Context::updateMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->mark_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); @@ -2513,13 +2502,13 @@ void Context::updateMarkCacheConfiguration(const Poco::Util::AbstractConfigurati MarkCachePtr Context::getMarkCache() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->mark_cache; } void Context::clearMarkCache() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->mark_cache) shared->mark_cache->clear(); @@ -2529,7 +2518,6 @@ ThreadPool & Context::getLoadMarksThreadpool() const { callOnce(shared->load_marks_threadpool_initialized, [&] { const auto & config = getConfigRef(); - auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50); auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000); shared->load_marks_threadpool = std::make_unique( @@ -2541,7 +2529,7 @@ ThreadPool & Context::getLoadMarksThreadpool() const void Context::setIndexUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->index_uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); @@ -2551,7 +2539,7 @@ void Context::setIndexUncompressedCache(const String & cache_policy, size_t max_ void Context::updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->index_uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet."); @@ -2562,13 +2550,13 @@ void Context::updateIndexUncompressedCacheConfiguration(const Poco::Util::Abstra UncompressedCachePtr Context::getIndexUncompressedCache() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->index_uncompressed_cache; } void Context::clearIndexUncompressedCache() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->index_uncompressed_cache) shared->index_uncompressed_cache->clear(); @@ -2576,7 +2564,7 @@ void Context::clearIndexUncompressedCache() const void Context::setIndexMarkCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->index_mark_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); @@ -2586,7 +2574,7 @@ void Context::setIndexMarkCache(const String & cache_policy, size_t max_cache_si void Context::updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getLocalLock(); + std::lock_guard lock(shared->mutex); if (!shared->index_mark_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache was not created yet."); @@ -2597,13 +2585,13 @@ void Context::updateIndexMarkCacheConfiguration(const Poco::Util::AbstractConfig MarkCachePtr Context::getIndexMarkCache() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->index_mark_cache; } void Context::clearIndexMarkCache() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->index_mark_cache) shared->index_mark_cache->clear(); @@ -2611,7 +2599,7 @@ void Context::clearIndexMarkCache() const void Context::setMMappedFileCache(size_t max_cache_size_in_num_entries) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->mmap_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); @@ -2621,7 +2609,7 @@ void Context::setMMappedFileCache(size_t max_cache_size_in_num_entries) void Context::updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->mmap_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache was not created yet."); @@ -2632,13 +2620,13 @@ void Context::updateMMappedFileCacheConfiguration(const Poco::Util::AbstractConf MMappedFileCachePtr Context::getMMappedFileCache() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->mmap_cache; } void Context::clearMMappedFileCache() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->mmap_cache) shared->mmap_cache->clear(); @@ -2646,7 +2634,7 @@ void Context::clearMMappedFileCache() const void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->query_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); @@ -2656,7 +2644,7 @@ void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->query_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache was not created yet."); @@ -2670,13 +2658,13 @@ void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfigurat QueryCachePtr Context::getQueryCache() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->query_cache; } void Context::clearQueryCache() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->query_cache) shared->query_cache->clear(); @@ -2684,7 +2672,7 @@ void Context::clearQueryCache() const void Context::clearCaches() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache was not created yet."); @@ -2826,7 +2814,7 @@ ThrottlerPtr Context::getRemoteReadThrottler() const ThrottlerPtr throttler = shared->remote_read_throttler; if (auto bandwidth = getSettingsRef().max_remote_read_network_bandwidth) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!remote_read_query_throttler) remote_read_query_throttler = std::make_shared(bandwidth, throttler); throttler = remote_read_query_throttler; @@ -2839,7 +2827,7 @@ ThrottlerPtr Context::getRemoteWriteThrottler() const ThrottlerPtr throttler = shared->remote_write_throttler; if (auto bandwidth = getSettingsRef().max_remote_write_network_bandwidth) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!remote_write_query_throttler) remote_write_query_throttler = std::make_shared(bandwidth, throttler); throttler = remote_write_query_throttler; @@ -2852,7 +2840,7 @@ ThrottlerPtr Context::getLocalReadThrottler() const ThrottlerPtr throttler = shared->local_read_throttler; if (auto bandwidth = getSettingsRef().max_local_read_bandwidth) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!local_read_query_throttler) local_read_query_throttler = std::make_shared(bandwidth, throttler); throttler = local_read_query_throttler; @@ -2865,7 +2853,7 @@ ThrottlerPtr Context::getLocalWriteThrottler() const ThrottlerPtr throttler = shared->local_write_throttler; if (auto bandwidth = getSettingsRef().max_local_write_bandwidth) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!local_write_query_throttler) local_write_query_throttler = std::make_shared(bandwidth, throttler); throttler = local_write_query_throttler; @@ -2878,7 +2866,7 @@ ThrottlerPtr Context::getBackupsThrottler() const ThrottlerPtr throttler = shared->backups_server_throttler; if (auto bandwidth = getSettingsRef().max_backup_bandwidth) { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!backups_query_throttler) backups_query_throttler = std::make_shared(bandwidth, throttler); throttler = backups_query_throttler; @@ -2893,7 +2881,7 @@ bool Context::hasDistributedDDL() const void Context::setDDLWorker(std::unique_ptr ddl_worker) { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (shared->ddl_worker) throw Exception(ErrorCodes::LOGICAL_ERROR, "DDL background thread has already been initialized"); ddl_worker->startup(); @@ -2902,7 +2890,7 @@ void Context::setDDLWorker(std::unique_ptr ddl_worker) DDLWorker & Context::getDDLWorker() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->ddl_worker) { if (!hasZooKeeper()) @@ -3018,15 +3006,29 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() /// This method explicitly sets correct pointer to system log after its initialization. /// TODO get rid of this if possible - std::lock_guard lock(shared->zookeeper_mutex); - if (!shared->system_logs || !shared->system_logs->zookeeper_log) + std::shared_ptr zookeeper_log; + { + SharedLockGuard lock(shared->mutex); + if (!shared->system_logs) + return; + + zookeeper_log = shared->system_logs->zookeeper_log; + } + + if (!zookeeper_log) return; - if (shared->zookeeper) - shared->zookeeper->setZooKeeperLog(shared->system_logs->zookeeper_log); + { + std::lock_guard lock(shared->zookeeper_mutex); + if (shared->zookeeper) + shared->zookeeper->setZooKeeperLog(zookeeper_log); + } - for (auto & zk : shared->auxiliary_zookeepers) - zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log); + { + std::lock_guard lock_auxiliary_zookeepers(shared->auxiliary_zookeepers_mutex); + for (auto & zk : shared->auxiliary_zookeepers) + zk.second->setZooKeeperLog(zookeeper_log); + } } void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const @@ -3236,21 +3238,25 @@ String Context::getInterserverScheme() const void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) { + std::lock_guard lock(shared->mutex); shared->remote_host_filter.setValuesFromConfig(config); } const RemoteHostFilter & Context::getRemoteHostFilter() const { + SharedLockGuard lock(shared->mutex); return shared->remote_host_filter; } void Context::setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config) { + std::lock_guard lock(shared->mutex); shared->http_header_filter.setValuesFromConfig(config); } const HTTPHeaderFilter & Context::getHTTPHeaderFilter() const { + SharedLockGuard lock(shared->mutex); return shared->http_header_filter; } @@ -3350,7 +3356,7 @@ std::map Context::getClusters() const return clusters; } -std::shared_ptr Context::getClustersImpl(std::lock_guard & /* lock */) const +std::shared_ptr Context::getClustersImpl(std::lock_guard & /* lock */) const TSA_REQUIRES(shared->clusters_mutex) { if (!shared->clusters) { @@ -3412,7 +3418,7 @@ void Context::initializeSystemLogs() /// of any other table if it is stored on a disk with cache. callOnce(shared->system_logs_initialized, [&] { auto system_logs = std::make_unique(getGlobalContext(), getConfigRef()); - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); shared->system_logs = std::move(system_logs); }); } @@ -3423,7 +3429,7 @@ void Context::initializeTraceCollector() } /// Call after unexpected crash happen. -void Context::handleCrash() const +void Context::handleCrash() const TSA_NO_THREAD_SAFETY_ANALYSIS { shared->system_logs->handleCrash(); } @@ -3436,7 +3442,7 @@ bool Context::hasTraceCollector() const std::shared_ptr Context::getQueryLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3446,7 +3452,7 @@ std::shared_ptr Context::getQueryLog() const std::shared_ptr Context::getQueryThreadLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3456,7 +3462,7 @@ std::shared_ptr Context::getQueryThreadLog() const std::shared_ptr Context::getQueryViewsLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3466,7 +3472,7 @@ std::shared_ptr Context::getQueryViewsLog() const std::shared_ptr Context::getPartLog(const String & part_database) const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); /// No part log or system logs are shutting down. if (!shared->system_logs) @@ -3484,7 +3490,7 @@ std::shared_ptr Context::getPartLog(const String & part_database) const std::shared_ptr Context::getTraceLog() const { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3495,7 +3501,7 @@ std::shared_ptr Context::getTraceLog() const std::shared_ptr Context::getTextLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3506,7 +3512,7 @@ std::shared_ptr Context::getTextLog() const std::shared_ptr Context::getMetricLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3517,7 +3523,7 @@ std::shared_ptr Context::getMetricLog() const std::shared_ptr Context::getAsynchronousMetricLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3528,7 +3534,7 @@ std::shared_ptr Context::getAsynchronousMetricLog() const std::shared_ptr Context::getOpenTelemetrySpanLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3538,7 +3544,7 @@ std::shared_ptr Context::getOpenTelemetrySpanLog() const std::shared_ptr Context::getSessionLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3549,7 +3555,7 @@ std::shared_ptr Context::getSessionLog() const std::shared_ptr Context::getZooKeeperLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3560,7 +3566,7 @@ std::shared_ptr Context::getZooKeeperLog() const std::shared_ptr Context::getTransactionsInfoLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3571,7 +3577,7 @@ std::shared_ptr Context::getTransactionsInfoLog() const std::shared_ptr Context::getProcessorsProfileLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3581,7 +3587,7 @@ std::shared_ptr Context::getProcessorsProfileLog() const std::shared_ptr Context::getFilesystemCacheLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3590,7 +3596,7 @@ std::shared_ptr Context::getFilesystemCacheLog() const std::shared_ptr Context::getS3QueueLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3599,7 +3605,7 @@ std::shared_ptr Context::getS3QueueLog() const std::shared_ptr Context::getFilesystemReadPrefetchesLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3608,7 +3614,7 @@ std::shared_ptr Context::getFilesystemReadPrefetche std::shared_ptr Context::getAsynchronousInsertLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3618,7 +3624,7 @@ std::shared_ptr Context::getAsynchronousInsertLog() const std::shared_ptr Context::getBackupLog() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3628,7 +3634,7 @@ std::shared_ptr Context::getBackupLog() const std::vector Context::getSystemLogs() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; @@ -3638,12 +3644,12 @@ std::vector Context::getSystemLogs() const CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->compression_codec_selector) { constexpr auto config_name = "compression"; - const auto & config = getConfigRefWithLock(lock); + const auto & config = shared->getConfigRefWithLock(lock); if (config.has(config_name)) shared->compression_codec_selector = std::make_unique(config, "compression"); @@ -3731,69 +3737,71 @@ StoragePoliciesMap Context::getPoliciesMap() const return getStoragePolicySelector(lock)->getPoliciesMap(); } -DiskSelectorPtr Context::getDiskSelector(std::lock_guard & /* lock */) const +DiskSelectorPtr Context::getDiskSelector(std::lock_guard & /* lock */) const TSA_REQUIRES(shared->storage_policies_mutex) { if (!shared->merge_tree_disk_selector) { constexpr auto config_name = "storage_configuration.disks"; const auto & config = getConfigRef(); - auto disk_selector = std::make_shared(); disk_selector->initialize(config, config_name, shared_from_this()); shared->merge_tree_disk_selector = disk_selector; } + return shared->merge_tree_disk_selector; } -StoragePolicySelectorPtr Context::getStoragePolicySelector(std::lock_guard & lock) const +StoragePolicySelectorPtr Context::getStoragePolicySelector(std::lock_guard & lock) const TSA_REQUIRES(shared->storage_policies_mutex) { if (!shared->merge_tree_storage_policy_selector) { constexpr auto config_name = "storage_configuration.policies"; const auto & config = getConfigRef(); - shared->merge_tree_storage_policy_selector = std::make_shared(config, config_name, getDiskSelector(lock)); } + return shared->merge_tree_storage_policy_selector; } void Context::updateStorageConfiguration(const Poco::Util::AbstractConfiguration & config) { - std::lock_guard lock(shared->storage_policies_mutex); - - if (shared->merge_tree_disk_selector) - shared->merge_tree_disk_selector - = shared->merge_tree_disk_selector->updateFromConfig(config, "storage_configuration.disks", shared_from_this()); - - if (shared->merge_tree_storage_policy_selector) { - try + std::lock_guard lock(shared->storage_policies_mutex); + if (shared->merge_tree_disk_selector) + shared->merge_tree_disk_selector + = shared->merge_tree_disk_selector->updateFromConfig(config, "storage_configuration.disks", shared_from_this()); + + if (shared->merge_tree_storage_policy_selector) { - shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig( - config, "storage_configuration.policies", shared->merge_tree_disk_selector); - } - catch (Exception & e) - { - LOG_ERROR( - shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message()); + try + { + shared->merge_tree_storage_policy_selector = shared->merge_tree_storage_policy_selector->updateFromConfig( + config, "storage_configuration.policies", shared->merge_tree_disk_selector); + } + catch (Exception & e) + { + LOG_ERROR( + shared->log, "An error has occurred while reloading storage policies, storage policies were not applied: {}", e.message()); + } } } - if (shared->storage_s3_settings) { - shared->storage_s3_settings->loadFromConfig("s3", config, getSettingsRef()); + std::lock_guard lock(shared->mutex); + if (shared->storage_s3_settings) + shared->storage_s3_settings->loadFromConfig("s3", config, getSettingsRef()); } } const MergeTreeSettings & Context::getMergeTreeSettings() const { - auto lock = getLocalLock(); + std::lock_guard lock(shared->mutex); if (!shared->merge_tree_settings) { - const auto & config = getConfigRefWithLock(lock); + const auto & config = shared->getConfigRefWithLock(lock); MergeTreeSettings mt_settings; mt_settings.loadFromConfig("merge_tree", config); shared->merge_tree_settings.emplace(mt_settings); @@ -3804,11 +3812,11 @@ const MergeTreeSettings & Context::getMergeTreeSettings() const const MergeTreeSettings & Context::getReplicatedMergeTreeSettings() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->replicated_merge_tree_settings) { - const auto & config = getConfigRefWithLock(lock); + const auto & config = shared->getConfigRefWithLock(lock); MergeTreeSettings mt_settings; mt_settings.loadFromConfig("merge_tree", config); mt_settings.loadFromConfig("replicated_merge_tree", config); @@ -3820,11 +3828,11 @@ const MergeTreeSettings & Context::getReplicatedMergeTreeSettings() const const StorageS3Settings & Context::getStorageS3Settings() const { - auto lock = getGlobalLock(); + std::lock_guard lock(shared->mutex); if (!shared->storage_s3_settings) { - const auto & config = getConfigRefWithLock(lock); + const auto & config = shared->getConfigRefWithLock(lock); shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef()); } @@ -3927,7 +3935,7 @@ OutputFormatPtr Context::getOutputFormatParallelIfPossible(const String & name, double Context::getUptimeSeconds() const { - auto lock = getGlobalSharedLock(); + SharedLockGuard lock(shared->mutex); return shared->uptime_watch.elapsedSeconds(); } @@ -3978,7 +3986,7 @@ void Context::stopServers(const ServerType & server_type) const } -void Context::shutdown() +void Context::shutdown() TSA_NO_THREAD_SAFETY_ANALYSIS { // Disk selector might not be initialized if there was some error during // its initialization. Don't try to initialize it again on shutdown. @@ -4089,7 +4097,7 @@ void Context::addQueryParameters(const NameToNameMap & parameters) void Context::addBridgeCommand(std::unique_ptr cmd) const { - auto lock = getLocalLock(); + std::lock_guard lock(shared->mutex); shared->bridge_commands.emplace_back(std::move(cmd)); } @@ -4314,7 +4322,7 @@ StorageID Context::resolveStorageID(StorageID storage_id, StorageNamespace where StorageID resolved = StorageID::createEmpty(); std::optional exc; { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); resolved = resolveStorageIDImpl(std::move(storage_id), where, &exc); } if (exc) @@ -4331,7 +4339,7 @@ StorageID Context::tryResolveStorageID(StorageID storage_id, StorageNamespace wh StorageID resolved = StorageID::createEmpty(); { - auto lock = getLocalSharedLock(); + SharedLockGuard lock(mutex); resolved = resolveStorageIDImpl(std::move(storage_id), where, nullptr); } if (resolved && !resolved.hasUUID() && resolved.database_name != DatabaseCatalog::TEMPORARY_DATABASE) @@ -4481,7 +4489,7 @@ MergeTreeTransactionPtr Context::getCurrentTransaction() const bool Context::isServerCompletelyStarted() const { - auto lock = getLocalLock(); + SharedLockGuard lock(shared->mutex); assert(getApplicationType() == ApplicationType::SERVER); return shared->is_server_completely_started; } @@ -4489,15 +4497,20 @@ bool Context::isServerCompletelyStarted() const void Context::setServerCompletelyStarted() { { - std::lock_guard lock(shared->zookeeper_mutex); - if (shared->zookeeper) - shared->zookeeper->setServerCompletelyStarted(); + { + std::lock_guard lock(shared->zookeeper_mutex); + if (shared->zookeeper) + shared->zookeeper->setServerCompletelyStarted(); + } - for (auto & zk : shared->auxiliary_zookeepers) - zk.second->setServerCompletelyStarted(); + { + std::lock_guard lock(shared->auxiliary_zookeepers_mutex); + for (auto & zk : shared->auxiliary_zookeepers) + zk.second->setServerCompletelyStarted(); + } } - auto lock = getLocalLock(); + std::lock_guard lock(shared->mutex); assert(global_context.lock().get() == this); assert(!shared->is_server_completely_started); assert(getApplicationType() == ApplicationType::SERVER); @@ -4506,7 +4519,8 @@ void Context::setServerCompletelyStarted() PartUUIDsPtr Context::getPartUUIDs() const { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); + if (!part_uuids) /// For context itself, only this initialization is not const. /// We could have done in constructor. @@ -4572,7 +4586,7 @@ UUID Context::getParallelReplicasGroupUUID() const PartUUIDsPtr Context::getIgnoredPartUUIDs() const { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!ignored_part_uuids) const_cast(ignored_part_uuids) = std::make_shared(); @@ -4596,7 +4610,8 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrbackground_executors_mutex); + if (shared->are_background_executors_initialized) return; @@ -4657,27 +4672,31 @@ void Context::initializeBackgroundExecutorsIfNeeded() bool Context::areBackgroundExecutorsInitialized() { - auto lock = getLocalLock(); + SharedLockGuard lock(shared->background_executors_mutex); return shared->are_background_executors_initialized; } MergeMutateBackgroundExecutorPtr Context::getMergeMutateExecutor() const { + SharedLockGuard lock(shared->background_executors_mutex); return shared->merge_mutate_executor; } OrdinaryBackgroundExecutorPtr Context::getMovesExecutor() const { + SharedLockGuard lock(shared->background_executors_mutex); return shared->moves_executor; } OrdinaryBackgroundExecutorPtr Context::getFetchesExecutor() const { + SharedLockGuard lock(shared->background_executors_mutex); return shared->fetch_executor; } OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const { + SharedLockGuard lock(shared->background_executors_mutex); return shared->common_executor; } @@ -4798,7 +4817,7 @@ WriteSettings Context::getWriteSettings() const std::shared_ptr Context::getAsyncReadCounters() const { - auto lock = getLocalLock(); + std::lock_guard lock(mutex); if (!async_read_counters) async_read_counters = std::make_shared(); return async_read_counters; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index fac56f3c143..ae04151e47e 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -231,6 +232,17 @@ private: std::unique_ptr shared; }; +class ContextSharedMutex : public SharedMutexHelper +{ +private: + using Base = SharedMutexHelper; + friend class SharedMutexHelper; + + void lockImpl(); + + void lockSharedImpl(); +}; + class ContextData { protected: @@ -491,7 +503,7 @@ class Context: public ContextData, public std::enable_shared_from_this { private: /// ContextData mutex - mutable SharedMutex mutex; + mutable ContextSharedMutex mutex; Context(); Context(const Context &); @@ -778,7 +790,6 @@ public: const ExternalDictionariesLoader & getExternalDictionariesLoader() const; ExternalDictionariesLoader & getExternalDictionariesLoader(); - ExternalDictionariesLoader & getExternalDictionariesLoaderUnlocked(); const EmbeddedDictionaries & getEmbeddedDictionaries() const; EmbeddedDictionaries & getEmbeddedDictionaries(); void tryCreateEmbeddedDictionaries(const Poco::Util::AbstractConfiguration & config) const; @@ -786,7 +797,6 @@ public: const ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader() const; ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoader(); - ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderUnlocked(); const IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader() const; IUserDefinedSQLObjectsLoader & getUserDefinedSQLObjectsLoader(); void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config); @@ -1220,37 +1230,27 @@ public: const ServerSettings & getServerSettings() const; private: - std::unique_lock getGlobalLock() const; - - std::shared_lock getGlobalSharedLock() const; - - std::unique_lock getLocalLock() const; - - std::shared_lock getLocalSharedLock() const; - - const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::unique_lock & lock) const; - std::shared_ptr getSettingsConstraintsAndCurrentProfilesWithLock() const; - void setCurrentProfileWithLock(const String & profile_name, bool check_constraints, const std::unique_lock & lock); + void setCurrentProfileWithLock(const String & profile_name, bool check_constraints, const std::lock_guard & lock); - void setCurrentProfileWithLock(const UUID & profile_id, bool check_constraints, const std::unique_lock & lock); + void setCurrentProfileWithLock(const UUID & profile_id, bool check_constraints, const std::lock_guard & lock); - void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::unique_lock & lock); + void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard & lock); - void setCurrentRolesWithLock(const std::vector & current_roles_, const std::unique_lock & lock); + void setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard & lock); - void setSettingWithLock(std::string_view name, const String & value, const std::unique_lock & lock); + void setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock); - void setSettingWithLock(std::string_view name, const Field & value, const std::unique_lock & lock); + void setSettingWithLock(std::string_view name, const Field & value, const std::lock_guard & lock); - void applySettingChangeWithLock(const SettingChange & change, const std::unique_lock & lock); + void applySettingChangeWithLock(const SettingChange & change, const std::lock_guard & lock); - void applySettingsChangesWithLock(const SettingsChanges & changes, const std::unique_lock & lock); + void applySettingsChangesWithLock(const SettingsChanges & changes, const std::lock_guard & lock); - void setUserIDWithLock(const UUID & user_id_, const std::unique_lock & lock); + void setUserIDWithLock(const UUID & user_id_, const std::lock_guard & lock); - void setCurrentDatabaseWithLock(const String & name, const std::unique_lock & lock); + void setCurrentDatabaseWithLock(const String & name, const std::lock_guard & lock); void checkSettingsConstraintsWithLock(const SettingsProfileElements & profile_elements, SettingSource source) const; @@ -1264,6 +1264,10 @@ private: void checkMergeTreeSettingsConstraintsWithLock(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const; + ExternalDictionariesLoader & getExternalDictionariesLoaderWithLock(const std::lock_guard & lock); + + ExternalUserDefinedExecutableFunctionsLoader & getExternalUserDefinedExecutableFunctionsLoaderWithLock(const std::lock_guard & lock); + void initGlobal(); void setUserID(const UUID & user_id_); diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 333aed84873..de5e5c2bf2b 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -3,77 +3,205 @@ #include #include #include +#include #include #include #include #include #include +#include #include +#include + +#include +#include + namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { -NamesAndTypes getBlockStructure() +Block getSingleValueBlock(UInt8 value) { - return { + return Block{{ColumnUInt8::create(1, value), std::make_shared(), "result"}}; +} + +Block getHeaderForCheckResult() +{ + auto names_and_types = NamesAndTypes{ {"part_path", std::make_shared()}, {"is_passed", std::make_shared()}, {"message", std::make_shared()}, }; + + return Block({ + {names_and_types[0].type->createColumn(), names_and_types[0].type, names_and_types[0].name}, + {names_and_types[1].type->createColumn(), names_and_types[1].type, names_and_types[1].name}, + {names_and_types[2].type->createColumn(), names_and_types[2].type, names_and_types[2].name}, + }); } +Chunk getChunkFromCheckResult(const CheckResult & check_result) +{ + MutableColumns columns = getHeaderForCheckResult().cloneEmptyColumns(); + columns[0]->insert(check_result.fs_path); + columns[1]->insert(static_cast(check_result.success)); + columns[2]->insert(check_result.failure_message); + return Chunk(std::move(columns), 1); +} + +class TableCheckWorkerProcessor : public ISource +{ + +public: + TableCheckWorkerProcessor(IStorage::DataValidationTasksPtr check_data_tasks_, StoragePtr table_) + : ISource(getHeaderForCheckResult()) + , table(table_) + , check_data_tasks(check_data_tasks_) + { + } + + String getName() const override { return "TableCheckWorkerProcessor"; } + +protected: + + std::optional tryGenerate() override + { + auto check_result = table->checkDataNext(check_data_tasks); + if (!check_result) + return {}; + + /// We can omit manual `progess` call, ISource will may count it automatically by returned chunk + /// However, we want to report only rows in progress, since bytes doesn't make sense here + progress(1, 0); + + if (!check_result->success) + { + LOG_WARNING(&Poco::Logger::get("InterpreterCheckQuery"), + "Check query for table {} failed, path {}, reason: {}", + table->getStorageID().getNameForLogs(), + check_result->fs_path, + check_result->failure_message); + } + + return getChunkFromCheckResult(*check_result); + } + +private: + StoragePtr table; + IStorage::DataValidationTasksPtr check_data_tasks; +}; + +class TableCheckResultEmitter : public IAccumulatingTransform +{ +public: + TableCheckResultEmitter() : IAccumulatingTransform(getHeaderForCheckResult(), getSingleValueBlock(1).cloneEmpty()) {} + + String getName() const override { return "TableCheckResultEmitter"; } + + void consume(Chunk chunk) override + { + if (result_value == 0) + return; + + auto columns = chunk.getColumns(); + if (columns.size() != 3) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of columns: {}", columns.size()); + + const auto * col = checkAndGetColumn(columns[1].get()); + for (size_t i = 0; i < col->size(); ++i) + { + if (col->getElement(i) == 0) + { + result_value = 0; + return; + } + } + } + + Chunk generate() override + { + if (is_value_emitted.exchange(true)) + return {}; + auto block = getSingleValueBlock(result_value); + return Chunk(block.getColumns(), block.rows()); + } + +private: + std::atomic result_value{1}; + std::atomic_bool is_value_emitted{false}; +}; + } - -InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, ContextPtr context_) : WithContext(context_), query_ptr(query_ptr_) +InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, ContextPtr context_) + : WithContext(context_) + , query_ptr(query_ptr_) { } - BlockIO InterpreterCheckQuery::execute() { const auto & check = query_ptr->as(); - auto table_id = getContext()->resolveStorageID(check, Context::ResolveOrdinary); + const auto & context = getContext(); + auto table_id = context->resolveStorageID(check, Context::ResolveOrdinary); - getContext()->checkAccess(AccessType::SHOW_TABLES, table_id); - StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); - auto check_results = table->checkData(query_ptr, getContext()); + context->checkAccess(AccessType::SHOW_TABLES, table_id); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); - Block block; - if (getContext()->getSettingsRef().check_query_single_value_result) - { - bool result = std::all_of(check_results.begin(), check_results.end(), [] (const CheckResult & res) { return res.success; }); - auto column = ColumnUInt8::create(); - column->insertValue(static_cast(result)); - block = Block{{std::move(column), std::make_shared(), "result"}}; - } - else - { - auto block_structure = getBlockStructure(); - auto path_column = block_structure[0].type->createColumn(); - auto is_passed_column = block_structure[1].type->createColumn(); - auto message_column = block_structure[2].type->createColumn(); + auto check_data_tasks = table->getCheckTaskList(query_ptr, context); - for (const auto & check_result : check_results) - { - path_column->insert(check_result.fs_path); - is_passed_column->insert(static_cast(check_result.success)); - message_column->insert(check_result.failure_message); - } - - block = Block({ - {std::move(path_column), block_structure[0].type, block_structure[0].name}, - {std::move(is_passed_column), block_structure[1].type, block_structure[1].name}, - {std::move(message_column), block_structure[2].type, block_structure[2].name}}); - } + const auto & settings = context->getSettingsRef(); BlockIO res; - res.pipeline = QueryPipeline(std::make_shared(std::move(block))); + { + auto processors = std::make_shared(); + std::vector worker_ports; + size_t num_streams = std::max(settings.max_threads, 1); + for (size_t i = 0; i < num_streams; ++i) + { + auto worker_processor = std::make_shared(check_data_tasks, table); + if (i == 0) + worker_processor->addTotalRowsApprox(check_data_tasks->size()); + worker_ports.emplace_back(&worker_processor->getPort()); + processors->emplace_back(worker_processor); + } + + OutputPort * resize_outport; + { + auto resize_processor = std::make_shared(getHeaderForCheckResult(), worker_ports.size(), 1); + + auto & resize_inputs = resize_processor->getInputs(); + auto resize_inport_it = resize_inputs.begin(); + for (size_t i = 0; i < worker_ports.size(); ++i, ++resize_inport_it) + connect(*worker_ports[i], *resize_inport_it); + processors->emplace_back(resize_processor); + + assert(resize_processor->getOutputs().size() == 1); + resize_outport = &resize_processor->getOutputs().front(); + } + + if (settings.check_query_single_value_result) + { + auto emitter_processor = std::make_shared(); + auto * input_port = &emitter_processor->getInputPort(); + processors->emplace_back(emitter_processor); + + connect(*resize_outport, *input_port); + } + + res.pipeline = QueryPipeline(Pipe(std::move(processors))); + res.pipeline.setNumThreads(num_streams); + } return res; } diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index 875c133bb76..2ef108b5d17 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -27,6 +27,7 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "path"}, ColumnWithTypeAndName{std::make_shared>(), "background_download_threads"}, ColumnWithTypeAndName{std::make_shared>(), "enable_bypass_cache_with_threshold"}, + ColumnWithTypeAndName{std::make_shared>(), "load_metadata_threads"}, }; return Block(columns); } @@ -55,6 +56,7 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[i++]->insert(cache->getBasePath()); res_columns[i++]->insert(settings.background_download_threads); res_columns[i++]->insert(settings.enable_bypass_cache_with_threshold); + res_columns[i++]->insert(settings.load_metadata_threads); BlockIO res; size_t num_rows = res_columns[0]->size(); diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index de3a3d68d39..131361acd91 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,7 @@ #include #include #include +#include #include #include #include @@ -201,6 +203,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 03b70dd8764..3194e753d0d 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2428,17 +2428,22 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// General limit for the number of threads. size_t max_threads_execute_query = settings.max_threads; - /** With distributed query processing, almost no computations are done in the threads, - * but wait and receive data from remote servers. - * If we have 20 remote servers, and max_threads = 8, then it would not be very good - * connect and ask only 8 servers at a time. - * To simultaneously query more remote servers, - * instead of max_threads, max_distributed_connections is used. + /** + * To simultaneously query more remote servers when async_socket_for_remote is off + * instead of max_threads, max_distributed_connections is used: + * since threads there mostly spend time waiting for data from remote servers, + * we can increase the degree of parallelism to avoid sequential querying of remote servers. + * + * DANGER: that can lead to insane number of threads working if there are a lot of stream and prefer_localhost_replica is used. + * + * That is not needed when async_socket_for_remote is on, because in that case + * threads are not blocked waiting for data from remote servers. + * */ - bool is_remote = false; - if (storage && storage->isRemote()) + bool is_sync_remote = false; + if (storage && storage->isRemote() && !settings.async_socket_for_remote) { - is_remote = true; + is_sync_remote = true; max_threads_execute_query = max_streams = settings.max_distributed_connections; } @@ -2494,7 +2499,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc max_streams = 1; /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads. - if (max_streams > 1 && !is_remote) + if (max_streams > 1 && !is_sync_remote) max_streams = static_cast(max_streams * settings.max_streams_to_max_threads_ratio); auto & prewhere_info = analysis_result.prewhere_info; @@ -2592,7 +2597,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// The inner local query (that is done in the same process, without /// network interaction), it will setMaxThreads earlier and distributed /// query will not update it. - if (!query_plan.getMaxThreads() || is_remote) + if (!query_plan.getMaxThreads() || is_sync_remote) query_plan.setMaxThreads(max_threads_execute_query); query_plan.setConcurrencyControl(settings.use_concurrency_control); diff --git a/src/Interpreters/InterpreterShowColumnsQuery.cpp b/src/Interpreters/InterpreterShowColumnsQuery.cpp index d14a36ef7e1..c8fb64e37f2 100644 --- a/src/Interpreters/InterpreterShowColumnsQuery.cpp +++ b/src/Interpreters/InterpreterShowColumnsQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -31,12 +32,8 @@ String InterpreterShowColumnsQuery::getRewrittenQuery() WriteBufferFromOwnString buf_database; String resolved_database = getContext()->resolveDatabase(query.database); - writeEscapedString(resolved_database, buf_database); - String database = buf_database.str(); - - WriteBufferFromOwnString buf_table; - writeEscapedString(query.table, buf_table); - String table = buf_table.str(); + String database = escapeString(resolved_database); + String table = escapeString(query.table); String rewritten_query; if (use_mysql_types) diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 9b36f1496e7..63cda814683 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -23,16 +24,9 @@ InterpreterShowIndexesQuery::InterpreterShowIndexesQuery(const ASTPtr & query_pt String InterpreterShowIndexesQuery::getRewrittenQuery() { const auto & query = query_ptr->as(); - - WriteBufferFromOwnString buf_table; - writeEscapedString(query.table, buf_table); - String table = buf_table.str(); - - WriteBufferFromOwnString buf_database; + String table = escapeString(query.table); String resolved_database = getContext()->resolveDatabase(query.database); - writeEscapedString(resolved_database, buf_database); - String database = buf_database.str(); - + String database = escapeString(resolved_database); String where_expression = query.where_expression ? fmt::format("WHERE ({})", query.where_expression) : ""; String rewritten_query = fmt::format(R"( diff --git a/src/Interpreters/InterpreterShowSettingQuery.cpp b/src/Interpreters/InterpreterShowSettingQuery.cpp new file mode 100644 index 00000000000..7567e77d28f --- /dev/null +++ b/src/Interpreters/InterpreterShowSettingQuery.cpp @@ -0,0 +1,34 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + + +InterpreterShowSettingQuery::InterpreterShowSettingQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_) + , query_ptr(query_ptr_) +{ +} + + +String InterpreterShowSettingQuery::getRewrittenQuery() +{ + const auto & query = query_ptr->as(); + return fmt::format(R"(SELECT value FROM system.settings WHERE name = '{0}')", escapeString(query.getSettingName())); +} + + +BlockIO InterpreterShowSettingQuery::execute() +{ + return executeQuery(getRewrittenQuery(), getContext(), true).second; +} + + +} + diff --git a/src/Interpreters/InterpreterShowSettingQuery.h b/src/Interpreters/InterpreterShowSettingQuery.h new file mode 100644 index 00000000000..6fa8869a284 --- /dev/null +++ b/src/Interpreters/InterpreterShowSettingQuery.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class Context; + + +/// Returns setting value for specified setting name +class InterpreterShowSettingQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterShowSettingQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_); + + BlockIO execute() override; + + /// Ignore quota and limits here because execute() produces a SELECT query which checks quotas/limits by itself. + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + ASTPtr query_ptr; + + String getRewrittenQuery(); +}; + + +} + diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 97bd8e7a8b7..00fda19b095 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -48,12 +48,12 @@ String InterpreterShowTablesQuery::getRewrittenQuery() << DB::quote << query.like; } - if (query.limit_length) - rewritten_query << " LIMIT " << query.limit_length; - /// (*) rewritten_query << " ORDER BY name"; + if (query.limit_length) + rewritten_query << " LIMIT " << query.limit_length; + return rewritten_query.str(); } @@ -116,6 +116,32 @@ String InterpreterShowTablesQuery::getRewrittenQuery() return rewritten_query.str(); } + /// SHOW MERGES + if (query.merges) + { + WriteBufferFromOwnString rewritten_query; + rewritten_query << "SELECT table, database, round((elapsed * (1 / progress)) - elapsed, 2) AS estimate_complete, elapsed, " + "round(progress, 2) AS progress, is_mutation, formatReadableSize(total_size_bytes_compressed) AS size, " + "formatReadableSize(memory_usage) AS mem FROM system.merges"; + + if (!query.like.empty()) + { + rewritten_query + << " WHERE table " + << (query.not_like ? "NOT " : "") + << (query.case_insensitive_like ? "ILIKE " : "LIKE ") + << DB::quote << query.like; + } + + /// (*) + rewritten_query << " ORDER BY elapsed desc"; + + if (query.limit_length) + rewritten_query << " LIMIT " << query.limit_length; + + return rewritten_query.str(); + } + if (query.temporary && !query.getFrom().empty()) throw Exception(ErrorCodes::SYNTAX_ERROR, "The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`"); @@ -158,7 +184,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() else if (query.where_expression) rewritten_query << " AND (" << query.where_expression << ")"; - /// (*) + /// (*) rewritten_query << " ORDER BY name "; if (query.limit_length) diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index cbdd656fb8c..f48b987561e 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -29,7 +29,13 @@ void NormalizeSelectWithUnionQueryMatcher::getSelectsFromUnionListNode(ASTPtr as void NormalizeSelectWithUnionQueryMatcher::visit(ASTPtr & ast, Data & data) { if (auto * select_union = ast->as()) + { + /// The rewrite of ASTSelectWithUnionQuery may strip the format info, so + /// we need to keep and restore it. + auto format = select_union->format; visit(*select_union, data); + select_union->format = format; + } } void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data & data) diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp index ea212991bd0..40de887da27 100644 --- a/src/Interpreters/PredicateRewriteVisitor.cpp +++ b/src/Interpreters/PredicateRewriteVisitor.cpp @@ -166,7 +166,7 @@ static void getConjunctionHashesFrom(const ASTPtr & ast, std::set & bool PredicateRewriteVisitorData::rewriteSubquery(ASTSelectQuery & subquery, const Names & inner_columns) { if ((!optimize_final && subquery.final()) - || (!optimize_with && subquery.with()) + || (subquery.with() && (!optimize_with || hasNonRewritableFunction(subquery.with(), getContext()))) || subquery.withFill() || subquery.limitBy() || subquery.limitLength() || subquery.limitByLength() || subquery.limitByOffset() || hasNonRewritableFunction(subquery.select(), getContext()) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index b8b61c7c11f..5df226cc296 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -324,11 +324,11 @@ ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) co if (!transform_null_in && data_types[i]->canBeInsideNullable()) { - result = castColumnAccurateOrNull(column_to_cast, data_types[i]); + result = castColumnAccurateOrNull(column_to_cast, data_types[i], cast_cache.get()); } else { - result = castColumnAccurate(column_to_cast, data_types[i]); + result = castColumnAccurate(column_to_cast, data_types[i], cast_cache.get()); } materialized_columns.emplace_back() = result; diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 9ea46e117ef..11e0ddbfae2 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -9,6 +9,7 @@ #include #include +#include namespace DB @@ -33,9 +34,9 @@ public: /// This is needed for subsequent use for index. Set(const SizeLimits & limits_, size_t max_elements_to_fill_, bool transform_null_in_) : log(&Poco::Logger::get("Set")), - limits(limits_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_) - { - } + limits(limits_), max_elements_to_fill(max_elements_to_fill_), transform_null_in(transform_null_in_), + cast_cache(std::make_unique()) + {} /** Set can be created either from AST or from a stream of data (subquery result). */ @@ -142,6 +143,10 @@ private: */ mutable SharedMutex rwlock; + /// A cache for cast functions (if any) to avoid rebuilding cast functions + /// for every call to `execute` + mutable std::unique_ptr cast_cache; + template void insertFromBlockImpl( Method & method, @@ -193,7 +198,7 @@ using FunctionPtr = std::shared_ptr; */ struct FieldValue { - FieldValue(MutableColumnPtr && column_) : column(std::move(column_)) {} + explicit FieldValue(MutableColumnPtr && column_) : column(std::move(column_)) {} void update(const Field & x); bool isNormal() const { return !value.isPositiveInfinity() && !value.isNegativeInfinity(); } @@ -225,6 +230,8 @@ public: size_t size() const { return ordered_set.at(0)->size(); } + const Columns & getOrderedSet() const { return ordered_set; } + bool hasMonotonicFunctionsChain() const; BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types, bool single_point = false) const; diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 932afec3bad..b27a8a48651 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -139,7 +139,7 @@ private: /* Saving thread data */ const StorageID table_id; const String storage_def; - String create_query; + const String create_query; String old_create_query; bool is_prepared = false; diff --git a/src/Interpreters/castColumn.cpp b/src/Interpreters/castColumn.cpp index dc9882b84b0..44e669a21ab 100644 --- a/src/Interpreters/castColumn.cpp +++ b/src/Interpreters/castColumn.cpp @@ -7,24 +7,29 @@ namespace DB { template -static ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type) +static ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache = nullptr) { if (arg.type->equals(*type) && cast_type != CastType::accurateOrNull) return arg.column; + const auto from_name = arg.type->getName(); + const auto to_name = type->getName(); ColumnsWithTypeAndName arguments { arg, { - DataTypeString().createColumnConst(arg.column->size(), type->getName()), + DataTypeString().createColumnConst(arg.column->size(), to_name), std::make_shared(), "" } }; + auto get_cast_func = [&arguments] + { + FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); + return func_builder_cast->build(arguments); + }; - FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); - - auto func_cast = func_builder_cast->build(arguments); + FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func(); if constexpr (cast_type == CastType::accurateOrNull) { @@ -36,19 +41,19 @@ static ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr } } -ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type) +ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache) { - return castColumn(arg, type); + return castColumn(arg, type, cache); } -ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type) +ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache) { - return castColumn(arg, type); + return castColumn(arg, type, cache); } -ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type) +ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache) { - return castColumn(arg, type); + return castColumn(arg, type, cache); } } diff --git a/src/Interpreters/castColumn.h b/src/Interpreters/castColumn.h index fcbea0f4646..8d2c05025bb 100644 --- a/src/Interpreters/castColumn.h +++ b/src/Interpreters/castColumn.h @@ -1,12 +1,34 @@ #pragma once +#include #include +#include namespace DB { -ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type); -ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type); -ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type); +struct InternalCastFunctionCache +{ +private: + /// Maps -> cast functions + /// Doesn't own key, never refer to key after inserted + std::map, FunctionBasePtr> impl; + mutable std::mutex mutex; +public: + template + FunctionBasePtr getOrSet(CastType cast_type, const String & from, const String & to, Getter && getter) + { + std::lock_guard lock{mutex}; + auto key = std::forward_as_tuple(cast_type, from, to); + auto it = impl.find(key); + if (it == impl.end()) + it = impl.emplace(key, getter()).first; + return it->second; + } +}; + +ColumnPtr castColumn(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache = nullptr); +ColumnPtr castColumnAccurate(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache = nullptr); +ColumnPtr castColumnAccurateOrNull(const ColumnWithTypeAndName & arg, const DataTypePtr & type, InternalCastFunctionCache * cache = nullptr); } diff --git a/src/Parsers/ASTCheckQuery.h b/src/Parsers/ASTCheckQuery.h index f29a0bd5406..eb72a99c757 100644 --- a/src/Parsers/ASTCheckQuery.h +++ b/src/Parsers/ASTCheckQuery.h @@ -10,6 +10,7 @@ namespace DB struct ASTCheckQuery : public ASTQueryWithTableAndOutput { ASTPtr partition; + String part_name; /** Get the text that identifies this element. */ String getID(char delim) const override { return "CheckQuery" + (delim + getDatabase()) + delim + getTable(); } diff --git a/src/Parsers/ASTShowSettingQuery.cpp b/src/Parsers/ASTShowSettingQuery.cpp new file mode 100644 index 00000000000..267d462475d --- /dev/null +++ b/src/Parsers/ASTShowSettingQuery.cpp @@ -0,0 +1,25 @@ +#include + +#include +#include +#include + +namespace DB +{ + +ASTPtr ASTShowSettingQuery::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + cloneOutputOptions(*res); + res->setting_name = setting_name; + return res; +} + +void ASTShowSettingQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW SETTING " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(setting_name); +} + +} diff --git a/src/Parsers/ASTShowSettingQuery.h b/src/Parsers/ASTShowSettingQuery.h new file mode 100644 index 00000000000..2b4395f307d --- /dev/null +++ b/src/Parsers/ASTShowSettingQuery.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Query SHOW SETTING setting_name +class ASTShowSettingQuery : public ASTQueryWithOutput +{ +public: + explicit ASTShowSettingQuery(String setting_name_) + : setting_name(setting_name_) + {} + + const String & getSettingName() const { return setting_name; } + + String getID(char) const override { return "ShowSetting"; } + ASTPtr clone() const override; + QueryKind getQueryKind() const override { return QueryKind::Show; } + +protected: + void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + +private: + String setting_name; +}; + +} + diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 03ae96fa288..5470bde10c8 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -78,6 +78,12 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format (settings.hilite ? hilite_none : ""); formatLike(settings); } + else if (merges) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW MERGES" << (settings.hilite ? hilite_none : ""); + formatLike(settings); + formatLimit(settings, state, frame); + } else { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index 2fd4c3dac96..f6fec2d1230 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -9,7 +9,7 @@ namespace DB { -/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS or SHOW CACHES +/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS or SHOW CACHES or SHOW MERGES */ class ASTShowTablesQuery : public ASTQueryWithOutput { @@ -19,6 +19,7 @@ public: bool cluster = false; bool dictionaries = false; bool m_settings = false; + bool merges = false; bool changed = false; bool temporary = false; bool caches = false; diff --git a/src/Parsers/ParserCheckQuery.cpp b/src/Parsers/ParserCheckQuery.cpp index 505b95bda8e..f70c7cda8d0 100644 --- a/src/Parsers/ParserCheckQuery.cpp +++ b/src/Parsers/ParserCheckQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -13,9 +14,11 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_check_table("CHECK TABLE"); ParserKeyword s_partition("PARTITION"); + ParserKeyword s_part("PART"); ParserToken s_dot(TokenType::Dot); ParserPartition partition_parser; + ParserStringLiteral parser_string_literal; if (!s_check_table.ignore(pos, expected)) return false; @@ -30,6 +33,17 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!partition_parser.parse(pos, query->partition, expected)) return false; } + else if (s_part.ignore(pos, expected)) + { + ASTPtr ast_part_name; + if (!parser_string_literal.parse(pos, ast_part_name, expected)) + return false; + + const auto * ast_literal = ast_part_name->as(); + if (!ast_literal || ast_literal->value.getType() != Field::Types::String) + return false; + query->part_name = ast_literal->value.get(); + } if (query->database) query->children.push_back(query->database); diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 44f375adb65..760ce73cf6f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -29,6 +29,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int SYNTAX_ERROR; } namespace @@ -1342,6 +1343,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_view("VIEW"); ParserKeyword s_materialized("MATERIALIZED"); ParserKeyword s_populate("POPULATE"); + ParserKeyword s_empty("EMPTY"); ParserKeyword s_or_replace("OR REPLACE"); ParserToken s_dot(TokenType::Dot); ParserToken s_lparen(TokenType::OpeningRoundBracket); @@ -1437,8 +1439,26 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (s_populate.ignore(pos, expected)) is_populate = true; - else if (ParserKeyword{"EMPTY"}.ignore(pos, expected)) + else if (s_empty.ignore(pos, expected)) is_create_empty = true; + + if (ParserKeyword{"TO"}.ignore(pos, expected)) + throw Exception( + ErrorCodes::SYNTAX_ERROR, "When creating a materialized view you can't declare both 'ENGINE' and 'TO [db].[table]'"); + } + else + { + if (storage_p.ignore(pos, expected)) + throw Exception( + ErrorCodes::SYNTAX_ERROR, "When creating a materialized view you can't declare both 'TO [db].[table]' and 'ENGINE'"); + + if (s_populate.ignore(pos, expected)) + throw Exception( + ErrorCodes::SYNTAX_ERROR, "When creating a materialized view you can't declare both 'TO [db].[table]' and 'POPULATE'"); + + if (s_empty.ignore(pos, expected)) + throw Exception( + ErrorCodes::SYNTAX_ERROR, "When creating a materialized view you can't declare both 'TO [db].[table]' and 'EMPTY'"); } /// AS SELECT ... diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 9a71bc222b5..f03df6cacfe 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserShowEnginesQuery show_engine_p; ParserShowFunctionsQuery show_functions_p; ParserShowIndexesQuery show_indexes_p; + ParserShowSettingQuery show_setting_p; ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; @@ -75,6 +77,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || show_engine_p.parse(pos, query, expected) || show_functions_p.parse(pos, query, expected) || show_indexes_p.parse(pos, query, expected) + || show_setting_p.parse(pos, query, expected) || table_p.parse(pos, query, expected) || describe_cache_p.parse(pos, query, expected) || describe_table_p.parse(pos, query, expected) diff --git a/src/Parsers/ParserShowSettingQuery.cpp b/src/Parsers/ParserShowSettingQuery.cpp new file mode 100644 index 00000000000..2586cbdfb43 --- /dev/null +++ b/src/Parsers/ParserShowSettingQuery.cpp @@ -0,0 +1,27 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +bool ParserShowSettingQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword("SHOW SETTING").ignore(pos, expected)) + return false; + + ASTPtr setting_name_identifier; + if (!ParserIdentifier().parse(pos, setting_name_identifier, expected)) + return false; + + node = std::make_shared(getIdentifierName(setting_name_identifier)); + + return true; +} + +} + diff --git a/src/Parsers/ParserShowSettingQuery.h b/src/Parsers/ParserShowSettingQuery.h new file mode 100644 index 00000000000..ef166133d09 --- /dev/null +++ b/src/Parsers/ParserShowSettingQuery.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ + +/** Parses queries of the form: + * SHOW SETTING [setting_name] + */ +class ParserShowSettingQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW SETTING query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index e3728eb2cd6..2b220dd0a0c 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -27,6 +27,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_dictionaries("DICTIONARIES"); ParserKeyword s_caches("FILESYSTEM CACHES"); ParserKeyword s_settings("SETTINGS"); + ParserKeyword s_merges("MERGES"); ParserKeyword s_changed("CHANGED"); ParserKeyword s_from("FROM"); ParserKeyword s_in("IN"); @@ -98,6 +99,29 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } } + else if (s_merges.ignore(pos, expected)) + { + query->merges = true; + + if (s_not.ignore(pos, expected)) + query->not_like = true; + + if (bool insensitive = s_ilike.ignore(pos, expected); insensitive || s_like.ignore(pos, expected)) + { + if (insensitive) + query->case_insensitive_like = true; + + if (!like_p.parse(pos, like, expected)) + return false; + } + else if (query->not_like) + return false; + if (s_limit.ignore(pos, expected)) + { + if (!exp_elem.parse(pos, query->limit_length, expected)) + return false; + } + } else if (s_caches.ignore(pos, expected)) { query->caches = true; diff --git a/src/Parsers/ParserShowTablesQuery.h b/src/Parsers/ParserShowTablesQuery.h index 1b679c2e85a..e21401cf520 100644 --- a/src/Parsers/ParserShowTablesQuery.h +++ b/src/Parsers/ParserShowTablesQuery.h @@ -14,7 +14,7 @@ namespace DB class ParserShowTablesQuery : public IParserBase { protected: - const char * getName() const override { return "SHOW [FULL] [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } + const char * getName() const override { return "SHOW [FULL] [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER|MERGES 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 6f83414fc20..d4a0b7bdc7b 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -599,15 +599,20 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres size_t max_streams = settings.max_threads; size_t max_threads_execute_query = settings.max_threads; - /** With distributed query processing, almost no computations are done in the threads, - * but wait and receive data from remote servers. - * If we have 20 remote servers, and max_threads = 8, then it would not be efficient to - * connect and ask only 8 servers at a time. - * To simultaneously query more remote servers, - * instead of max_threads, max_distributed_connections is used. - */ - bool is_remote = table_expression_data.isRemote(); - if (is_remote) + /** + * To simultaneously query more remote servers when async_socket_for_remote is off + * instead of max_threads, max_distributed_connections is used: + * since threads there mostly spend time waiting for data from remote servers, + * we can increase the degree of parallelism to avoid sequential querying of remote servers. + * + * DANGER: that can lead to insane number of threads working if there are a lot of stream and prefer_localhost_replica is used. + * + * That is not needed when async_socket_for_remote is on, because in that case + * threads are not blocked waiting for data from remote servers. + * + */ + bool is_sync_remote = table_expression_data.isRemote() && !settings.async_socket_for_remote; + if (is_sync_remote) { max_streams = settings.max_distributed_connections; max_threads_execute_query = settings.max_distributed_connections; @@ -647,7 +652,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres max_streams = 1; /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads - if (max_streams > 1 && !is_remote) + if (max_streams > 1 && !is_sync_remote) max_streams = static_cast(max_streams * settings.max_streams_to_max_threads_ratio); if (table_node) @@ -841,7 +846,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres * network interaction), it will setMaxThreads earlier and distributed * query will not update it. */ - if (!query_plan.getMaxThreads() || is_remote) + if (!query_plan.getMaxThreads() || is_sync_remote) query_plan.setMaxThreads(max_threads_execute_query); query_plan.setConcurrencyControl(settings.use_concurrency_control); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index cd442085eca..3839a8963b2 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -14,8 +14,7 @@ namespace ErrorCodes extern const int POSITION_OUT_OF_BOUND; } -Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) - : columns(std::move(columns_)), num_rows(num_rows_) +Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns_)), num_rows(num_rows_) { checkNumRowsIsConsistent(); } diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index c30586e194e..598a51bf0c7 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -75,7 +75,7 @@ void CompletedPipelineExecutor::execute() if (interactive_timeout_ms) { data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); /// Avoid passing this to lambda, copy ptr to data instead. @@ -105,7 +105,7 @@ void CompletedPipelineExecutor::execute() } else { - PipelineExecutor executor(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); + PipelineExecutor executor(pipeline.processors, pipeline.process_list_element); executor.setReadProgressCallback(pipeline.getReadProgressCallback()); executor.execute(pipeline.getNumThreads(), pipeline.getConcurrencyControl()); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 6a946b4a4b9..27f6a454b24 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -260,6 +260,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue { pid = updated_processors.top(); updated_processors.pop(); + /// In this method we have ownership on node. auto & node = *nodes[pid]; diff --git a/src/Processors/Executors/ExecutionThreadContext.h b/src/Processors/Executors/ExecutionThreadContext.h index 85788a70771..eb048f8ab09 100644 --- a/src/Processors/Executors/ExecutionThreadContext.h +++ b/src/Processors/Executors/ExecutionThreadContext.h @@ -30,12 +30,6 @@ private: /// Callback for read progress. ReadProgressCallback * read_progress_callback = nullptr; - /// Timer that stops optimization of running local tasks instead of queuing them. - /// It provides local progress for each IProcessor task, allowing the partial result of the request to be always sended to the user. - Stopwatch watch; - /// Time period that limits the maximum allowed duration for optimizing the scheduling of local tasks within the executor - const UInt64 partial_result_duration_ms; - public: #ifndef NDEBUG /// Time for different processing stages. @@ -68,13 +62,8 @@ public: void setException(std::exception_ptr exception_) { exception = exception_; } void rethrowExceptionIfHas(); - bool needWatchRestartForPartialResultProgress() { return partial_result_duration_ms != 0 && partial_result_duration_ms < watch.elapsedMilliseconds(); } - void restartWatch() { watch.restart(); } - - explicit ExecutionThreadContext(size_t thread_number_, bool profile_processors_, bool trace_processors_, ReadProgressCallback * callback, UInt64 partial_result_duration_ms_) + explicit ExecutionThreadContext(size_t thread_number_, bool profile_processors_, bool trace_processors_, ReadProgressCallback * callback) : read_progress_callback(callback) - , watch(CLOCK_MONOTONIC) - , partial_result_duration_ms(partial_result_duration_ms_) , thread_number(thread_number_) , profile_processors(profile_processors_) , trace_processors(trace_processors_) diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index 08920592391..e61d225a968 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -108,15 +108,8 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea { context.setTask(nullptr); - /// If sending partial results is allowed and local tasks scheduling optimization is repeated longer than the limit - /// or new task need to send partial result later, skip optimization for this iteration. - /// Otherwise take local task from queue if has one. - if ((!queue.empty() && queue.front()->processor->isPartialResultProcessor()) - || context.needWatchRestartForPartialResultProgress()) - { - context.restartWatch(); - } - else if (!queue.empty() && !context.hasAsyncTasks()) + /// Take local task from queue if has one. + if (!queue.empty() && !context.hasAsyncTasks()) { context.setTask(queue.front()); queue.pop(); @@ -146,7 +139,7 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea } } -void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms) +void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback) { num_threads = num_threads_; use_threads = use_threads_; @@ -158,7 +151,7 @@ void ExecutorTasks::init(size_t num_threads_, size_t use_threads_, bool profile_ executor_contexts.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) - executor_contexts.emplace_back(std::make_unique(i, profile_processors, trace_processors, callback, partial_result_duration_ms)); + executor_contexts.emplace_back(std::make_unique(i, profile_processors, trace_processors, callback)); } } diff --git a/src/Processors/Executors/ExecutorTasks.h b/src/Processors/Executors/ExecutorTasks.h index ab6d5e91411..d35f8de94d1 100644 --- a/src/Processors/Executors/ExecutorTasks.h +++ b/src/Processors/Executors/ExecutorTasks.h @@ -58,7 +58,7 @@ public: void tryGetTask(ExecutionThreadContext & context); void pushTasks(Queue & queue, Queue & async_queue, ExecutionThreadContext & context); - void init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback, UInt64 partial_result_duration_ms); + void init(size_t num_threads_, size_t use_threads_, bool profile_processors, bool trace_processors, ReadProgressCallback * callback); void fill(Queue & queue); void upscale(size_t use_threads_); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 77779e2cec2..37af391fba3 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -33,9 +33,8 @@ namespace ErrorCodes } -PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem, UInt64 partial_result_duration_ms_) +PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem) : process_list_element(std::move(elem)) - , partial_result_duration_ms(partial_result_duration_ms_) { if (process_list_element) { @@ -329,7 +328,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_ Queue queue; graph->initializeExecution(queue); - tasks.init(num_threads, use_threads, profile_processors, trace_processors, read_progress_callback.get(), partial_result_duration_ms); + tasks.init(num_threads, use_threads, profile_processors, trace_processors, read_progress_callback.get()); tasks.fill(queue); if (num_threads > 1) diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 6cb0e6c4ac1..dee12dad282 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -33,7 +33,7 @@ public: /// During pipeline execution new processors can appear. They will be added to existing set. /// /// Explicit graph representation is built in constructor. Throws if graph is not correct. - explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem, UInt64 partial_result_duration_ms_ = 0); + explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem); ~PipelineExecutor(); /// Execute pipeline in multiple threads. Must be called once. @@ -90,9 +90,6 @@ private: ReadProgressCallbackPtr read_progress_callback; - /// Duration between sending partial result through the pipeline - const UInt64 partial_result_duration_ms; - using Queue = std::queue; void initializeExecution(size_t num_threads, bool concurrency_control); /// Initialize executor contexts and task_queue. diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 95a2022bf93..345bec395b2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -41,13 +41,12 @@ struct PullingAsyncPipelineExecutor::Data } }; -PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool has_partial_result_setting) : pipeline(pipeline_) +PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) { if (!pipeline.pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for PullingAsyncPipelineExecutor must be pulling"); - lazy_format = std::make_shared(pipeline.output->getHeader(), /*is_partial_result_protocol_active*/ has_partial_result_setting); - + lazy_format = std::make_shared(pipeline.output->getHeader()); pipeline.complete(lazy_format); } @@ -104,7 +103,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) if (!data) { data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); data->lazy_format = lazy_format.get(); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.h b/src/Processors/Executors/PullingAsyncPipelineExecutor.h index 202ecbf281b..361bcc0155c 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.h @@ -21,7 +21,7 @@ struct ProfileInfo; class PullingAsyncPipelineExecutor { public: - explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_, bool has_partial_result_setting = false); + explicit PullingAsyncPipelineExecutor(QueryPipeline & pipeline_); ~PullingAsyncPipelineExecutor(); /// Get structure of returned block or chunk. diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index f79f15c19bf..cbf73c5cb07 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -44,7 +44,7 @@ bool PullingPipelineExecutor::pull(Chunk & chunk) { if (!executor) { - executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element); executor->setReadProgressCallback(pipeline.getReadProgressCallback()); } diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index f3ed24e7e96..a816ab9ca7f 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -167,7 +167,7 @@ void PushingAsyncPipelineExecutor::start() started = true; data = std::make_unique(); - data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); + data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); data->source = pushing_source.get(); diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index f2b018792c7..696932932df 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -87,7 +87,7 @@ void PushingPipelineExecutor::start() return; started = true; - executor = std::make_shared(pipeline.processors, pipeline.process_list_element, pipeline.partial_result_duration_ms); + executor = std::make_shared(pipeline.processors, pipeline.process_list_element); executor->setReadProgressCallback(pipeline.getReadProgressCallback()); if (!executor->executeStep(&input_wait_flag)) diff --git a/src/Processors/Formats/IInputFormat.cpp b/src/Processors/Formats/IInputFormat.cpp index a87db5a0d4d..031b396679c 100644 --- a/src/Processors/Formats/IInputFormat.cpp +++ b/src/Processors/Formats/IInputFormat.cpp @@ -6,7 +6,7 @@ namespace DB { IInputFormat::IInputFormat(Block header, ReadBuffer * in_) - : ISource(std::move(header)), in(in_) + : SourceWithKeyCondition(std::move(header)), in(in_) { column_mapping = std::make_shared(); } diff --git a/src/Processors/Formats/IInputFormat.h b/src/Processors/Formats/IInputFormat.h index c7b8d97d145..5afc24c9298 100644 --- a/src/Processors/Formats/IInputFormat.h +++ b/src/Processors/Formats/IInputFormat.h @@ -1,10 +1,11 @@ #pragma once -#include -#include +#include #include #include -#include +#include +#include +#include namespace DB @@ -16,7 +17,7 @@ using ColumnMappingPtr = std::shared_ptr; /** Input format is a source, that reads data from ReadBuffer. */ -class IInputFormat : public ISource +class IInputFormat : public SourceWithKeyCondition { protected: @@ -26,10 +27,6 @@ public: /// ReadBuffer can be nullptr for random-access formats. IInputFormat(Block header, ReadBuffer * in_); - /// If the format is used by a SELECT query, this method may be called. - /// The format may use it for filter pushdown. - virtual void setQueryInfo(const SelectQueryInfo &, ContextPtr) {} - /** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format. * The recreating of parser for each small stream takes too long, so we introduce a method * resetParser() which allow to reset the state of parser to continue reading of diff --git a/src/Processors/Formats/IOutputFormat.cpp b/src/Processors/Formats/IOutputFormat.cpp index e691e32a7bc..88a6fb1e92f 100644 --- a/src/Processors/Formats/IOutputFormat.cpp +++ b/src/Processors/Formats/IOutputFormat.cpp @@ -1,89 +1,40 @@ #include #include -#include namespace DB { -IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_, bool is_partial_result_protocol_active_) - : IProcessor({header_, header_, header_, header_}, {}) - , out(out_) - , is_partial_result_protocol_active(is_partial_result_protocol_active_) +IOutputFormat::IOutputFormat(const Block & header_, WriteBuffer & out_) + : IProcessor({header_, header_, header_}, {}), out(out_) { } -void IOutputFormat::setCurrentChunk(InputPort & input, PortKind kind) -{ - current_chunk = input.pull(true); - current_block_kind = kind; - has_input = true; -} - -IOutputFormat::Status IOutputFormat::prepareMainAndPartialResult() -{ - bool need_data = false; - for (auto kind : {Main, PartialResult}) - { - auto & input = getPort(kind); - - if (input.isFinished()) - continue; - - if (kind == PartialResult && main_input_activated) - { - input.close(); - continue; - } - - input.setNeeded(); - need_data = true; - - if (!input.hasData()) - continue; - - setCurrentChunk(input, kind); - return Status::Ready; - } - - if (need_data) - return Status::NeedData; - - return Status::Finished; -} - -IOutputFormat::Status IOutputFormat::prepareTotalsAndExtremes() -{ - for (auto kind : {Totals, Extremes}) - { - auto & input = getPort(kind); - - if (!input.isConnected() || input.isFinished()) - continue; - - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - - setCurrentChunk(input, kind); - return Status::Ready; - } - - return Status::Finished; -} - IOutputFormat::Status IOutputFormat::prepare() { if (has_input) return Status::Ready; - auto status = prepareMainAndPartialResult(); - if (status != Status::Finished) - return status; + for (auto kind : {Main, Totals, Extremes}) + { + auto & input = getPort(kind); - status = prepareTotalsAndExtremes(); - if (status != Status::Finished) - return status; + if (kind != Main && !input.isConnected()) + continue; + + if (input.isFinished()) + continue; + + input.setNeeded(); + + if (!input.hasData()) + return Status::NeedData; + + current_chunk = input.pull(true); + current_block_kind = kind; + has_input = true; + return Status::Ready; + } finished = true; @@ -132,18 +83,8 @@ void IOutputFormat::work() case Main: result_rows += current_chunk.getNumRows(); result_bytes += current_chunk.allocatedBytes(); - if (is_partial_result_protocol_active && !main_input_activated && current_chunk.hasRows()) - { - /// Sending an empty block signals to the client that partial results are terminated, - /// and only data from the main pipeline will be forwarded. - consume(Chunk(current_chunk.cloneEmptyColumns(), 0)); - main_input_activated = true; - } consume(std::move(current_chunk)); break; - case PartialResult: - consumePartialResult(std::move(current_chunk)); - break; case Totals: writeSuffixIfNeeded(); if (auto totals = prepareTotals(std::move(current_chunk))) @@ -178,15 +119,6 @@ void IOutputFormat::write(const Block & block) flush(); } -void IOutputFormat::writePartialResult(const Block & block) -{ - writePrefixIfNeeded(); - consumePartialResult(Chunk(block.getColumns(), block.rows())); - - if (auto_flush) - flush(); -} - void IOutputFormat::finalize() { if (finalized) diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index e642132fb64..cae2ab7691e 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -23,9 +23,9 @@ class WriteBuffer; class IOutputFormat : public IProcessor { public: - enum PortKind { Main = 0, Totals = 1, Extremes = 2, PartialResult = 3 }; + enum PortKind { Main = 0, Totals = 1, Extremes = 2 }; - IOutputFormat(const Block & header_, WriteBuffer & out_, bool is_partial_result_protocol_active_ = false); + IOutputFormat(const Block & header_, WriteBuffer & out_); Status prepare() override; void work() override; @@ -54,7 +54,6 @@ public: /// TODO: separate formats and processors. void write(const Block & block); - void writePartialResult(const Block & block); void finalize(); @@ -122,7 +121,6 @@ protected: virtual void consume(Chunk) = 0; virtual void consumeTotals(Chunk) {} virtual void consumeExtremes(Chunk) {} - virtual void consumePartialResult(Chunk) {} virtual void finalizeImpl() {} virtual void finalizeBuffers() {} virtual void writePrefix() {} @@ -176,7 +174,6 @@ protected: Chunk current_chunk; PortKind current_block_kind = PortKind::Main; - bool main_input_activated = false; bool has_input = false; bool finished = false; bool finalized = false; @@ -191,15 +188,9 @@ protected: Statistics statistics; private: - void setCurrentChunk(InputPort & input, PortKind kind); - IOutputFormat::Status prepareMainAndPartialResult(); - IOutputFormat::Status prepareTotalsAndExtremes(); - size_t rows_read_before = 0; bool are_totals_written = false; - bool is_partial_result_protocol_active = false; - /// Counters for consumed chunks. Are used for QueryLog. size_t result_rows = 0; size_t result_bytes = 0; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 33f108f5ad7..aadb1e9e1d6 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -160,9 +160,12 @@ static void insertNumber(IColumn & column, WhichDataType type, T value) } template -static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::NodePtr & root_node, const DataTypePtr & target_type) +static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::NodePtr & root_node, const DataTypePtr & target_type, bool is_fixed) { auto logical_type = root_node->logicalType(); + size_t fixed_size = 0; + if (is_fixed) + fixed_size = root_node->fixedSize(); const auto & decimal_type = assert_cast(*target_type); if (decimal_type.getScale() != static_cast(logical_type.scale()) || decimal_type.getPrecision() != static_cast(logical_type.precision())) throw Exception( @@ -174,14 +177,18 @@ static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::No decimal_type.getScale(), decimal_type.getPrecision()); - return [tmp = std::string(), target_type](IColumn & column, avro::Decoder & decoder) mutable + return [tmp = std::vector(), target_type, fixed_size](IColumn & column, avro::Decoder & decoder) mutable { static constexpr size_t field_type_size = sizeof(typename DecimalType::FieldType); - decoder.decodeString(tmp); - if (tmp.size() > field_type_size) + if (fixed_size) + tmp = decoder.decodeFixed(fixed_size); + else + tmp = decoder.decodeBytes(); + + if (tmp.size() > field_type_size || tmp.empty()) throw ParsingException( ErrorCodes::CANNOT_PARSE_UUID, - "Cannot parse type {}, expected binary data with size equal to or less than {}, got {}", + "Cannot parse type {}, expected non-empty binary data with size equal to or less than {}, got {}", target_type->getName(), field_type_size, tmp.size()); @@ -189,10 +196,12 @@ static AvroDeserializer::DeserializeFn createDecimalDeserializeFn(const avro::No { /// Extent value to required size by adding padding. /// Check if value is negative or positive. + std::vector padding; if (tmp[0] & 128) - tmp = std::string(field_type_size - tmp.size(), 0xff) + tmp; + padding = std::vector(field_type_size - tmp.size(), 0xff); else - tmp = std::string(field_type_size - tmp.size(), 0) + tmp; + padding = std::vector(field_type_size - tmp.size(), 0); + tmp.insert(tmp.begin(), padding.begin(), padding.end()); } typename DecimalType::FieldType field; @@ -282,15 +291,15 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro }; } if (target.isDecimal32()) - return createDecimalDeserializeFn(root_node, target_type); + return createDecimalDeserializeFn(root_node, target_type, false); if (target.isDecimal64()) - return createDecimalDeserializeFn(root_node, target_type); + return createDecimalDeserializeFn(root_node, target_type, false); if (target.isDecimal128()) - return createDecimalDeserializeFn(root_node, target_type); + return createDecimalDeserializeFn(root_node, target_type, false); if (target.isDecimal256()) - return createDecimalDeserializeFn(root_node, target_type); + return createDecimalDeserializeFn(root_node, target_type, false); if (target.isDateTime64()) - return createDecimalDeserializeFn(root_node, target_type); + return createDecimalDeserializeFn(root_node, target_type, false); break; case avro::AVRO_INT: if (target_type->isValueRepresentedByNumber()) @@ -515,6 +524,29 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro return true; }; } + if (target.isUUID()) + { + return [tmp = std::vector(), fixed_size](IColumn & column, avro::Decoder & decoder) mutable + { + decoder.decodeFixed(fixed_size, tmp); + if (tmp.size() != 36) + throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse UUID from type Fixed, because it's size ({}) is not equal to the size of UUID (36)", fixed_size); + + const UUID uuid = parseUUID({reinterpret_cast(tmp.data()), tmp.size()}); + assert_cast(column).insertValue(uuid); + return true; + }; + } + if (target.isDecimal32()) + return createDecimalDeserializeFn(root_node, target_type, true); + if (target.isDecimal64()) + return createDecimalDeserializeFn(root_node, target_type, true); + if (target.isDecimal128()) + return createDecimalDeserializeFn(root_node, target_type, true); + if (target.isDecimal256()) + return createDecimalDeserializeFn(root_node, target_type, true); + if (target.isDateTime64()) + return createDecimalDeserializeFn(root_node, target_type, true); break; } case avro::AVRO_SYMBOLIC: @@ -1210,7 +1242,16 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ClickHouse supports only 8 and 16-bit Enum."); } case avro::Type::AVRO_FIXED: + { + auto logical_type = node->logicalType(); + if (logical_type.type() == avro::LogicalType::UUID) + return std::make_shared(); + + if (logical_type.type() == avro::LogicalType::DECIMAL) + return createDecimal(logical_type.precision(), logical_type.scale()); + return std::make_shared(node->fixedSize()); + } case avro::Type::AVRO_ARRAY: return std::make_shared(avroNodeToDataType(node->leafAt(0))); case avro::Type::AVRO_NULL: diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 1b6cde11be7..b346ef3d232 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -14,6 +14,7 @@ # include # include # include +# include # include # include # include @@ -28,7 +29,9 @@ # include # include # include +# include # include +# include # include "ArrowBufferedStreams.h" @@ -97,6 +100,15 @@ std::unique_ptr asORCInputStreamLoadIntoMemory(ReadBuffer & in return std::make_unique(std::move(file_data), file_size); } +static const orc::Type * getORCTypeByName(const orc::Type & schema, const String & name, bool case_insensitive_column_matching) +{ + for (uint64_t i = 0; i != schema.getSubtypeCount(); ++i) + if (boost::equals(schema.getFieldName(i), name) + || (case_insensitive_column_matching && boost::iequals(schema.getFieldName(i), name))) + return schema.getSubtype(i); + return nullptr; +} + static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_with_unsupported_types, bool & skipped) { assert(orc_type != nullptr); @@ -122,6 +134,8 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi return std::make_shared(); case orc::TypeKind::TIMESTAMP: return std::make_shared(9); + case orc::TypeKind::TIMESTAMP_INSTANT: + return std::make_shared(9, "UTC"); case orc::TypeKind::VARCHAR: case orc::TypeKind::BINARY: case orc::TypeKind::STRING: @@ -197,6 +211,473 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi } } +static std::optional convertORCTypeToPredicateType(const orc::Type & orc_type) +{ + switch (orc_type.getKind()) + { + case orc::BOOLEAN: + return orc::PredicateDataType::BOOLEAN; + case orc::BYTE: + case orc::SHORT: + case orc::INT: + case orc::LONG: + return orc::PredicateDataType::LONG; + case orc::FLOAT: + case orc::DOUBLE: + return orc::PredicateDataType::FLOAT; + case orc::VARCHAR: + case orc::CHAR: + case orc::STRING: + return orc::PredicateDataType::STRING; + case orc::DATE: + return orc::PredicateDataType::DATE; + case orc::TIMESTAMP: + return orc::PredicateDataType::TIMESTAMP; + case orc::DECIMAL: + return orc::PredicateDataType::DECIMAL; + default: + return {}; + } +} + +static String getColumnNameFromKeyCondition(const KeyCondition & key_condition, size_t indice) +{ + const auto & key_columns = key_condition.getKeyColumns(); + for (const auto & [name, i] : key_columns) + { + if (i == indice) + return name; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't get column from KeyCondition with indice {}", indice); +} + +static std::optional +convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTypePtr type_hint = nullptr) +{ + try + { + /// We always fallback to return null if possible CH type hint not consistent with ORC type + switch (orc_type.getKind()) + { + case orc::BOOLEAN: { + /// May throw exception + auto val = field.get(); + return orc::Literal(val != 0); + } + case orc::BYTE: + case orc::SHORT: + case orc::INT: + case orc::LONG: { + /// May throw exception + auto val = field.get(); + return orc::Literal(val); + } + case orc::FLOAT: + case orc::DOUBLE: { + Float64 val; + if (field.tryGet(val)) + return orc::Literal(val); + break; + } + case orc::VARCHAR: + case orc::CHAR: + case orc::STRING: { + String str; + if (field.tryGet(str)) + return orc::Literal(str.data(), str.size()); + break; + } + case orc::DATE: { + Int64 val; + if (field.tryGet(val)) + return orc::Literal(orc::PredicateDataType::DATE, val); + break; + } + case orc::TIMESTAMP: { + if (type_hint && isDateTime64(type_hint)) + { + const auto * datetime64_type = typeid_cast(type_hint.get()); + if (datetime64_type->getScale() != 9) + return std::nullopt; + } + + DecimalField ts; + if (field.tryGet(ts)) + { + Int64 secs = (ts.getValue() / ts.getScaleMultiplier()).convertTo(); + Int32 nanos = (ts.getValue() - (ts.getValue() / ts.getScaleMultiplier()) * ts.getScaleMultiplier()).convertTo(); + return orc::Literal(secs, nanos); + } + break; + } + case orc::DECIMAL: { + auto precision = orc_type.getPrecision(); + if (precision == 0) + precision = 38; + + if (precision <= DecimalUtils::max_precision) + { + DecimalField val; + if (field.tryGet(val)) + { + Int64 right = val.getValue().convertTo(); + return orc::Literal( + orc::Int128(right), static_cast(orc_type.getPrecision()), static_cast(orc_type.getScale())); + } + } + else if (precision <= DecimalUtils::max_precision) + { + DecimalField val; + if (field.tryGet(val)) + { + Int64 right = val.getValue().convertTo(); + return orc::Literal( + orc::Int128(right), static_cast(orc_type.getPrecision()), static_cast(orc_type.getScale())); + } + } + else if (precision <= DecimalUtils::max_precision) + { + DecimalField val; + if (field.tryGet(val)) + { + Int64 high = val.getValue().value.items[1]; + UInt64 low = static_cast(val.getValue().value.items[0]); + return orc::Literal( + orc::Int128(high, low), static_cast(orc_type.getPrecision()), static_cast(orc_type.getScale())); + } + } + break; + } + default: + break; + } + return std::nullopt; + } + catch (Exception &) + { + return std::nullopt; + } +} + +/// Attention: evaluateRPNElement is only invoked in buildORCSearchArgumentImpl. +/// So it is guaranteed that: +/// 1. elem has no monotonic_functions_chains. +/// 2. if elem function is FUNCTION_IN_RANGE/FUNCTION_NOT_IN_RANGE, `set_index` is not null and `set_index->getOrderedSet().size()` is 1. +/// 3. elem function should be FUNCTION_IN_RANGE/FUNCTION_NOT_IN_RANGE/FUNCTION_IN_SET/FUNCTION_NOT_IN_SET/FUNCTION_IS_NULL/FUNCTION_IS_NOT_NULL +static bool evaluateRPNElement(const Field & field, const KeyCondition::RPNElement & elem) +{ + Range key_range(field); + switch (elem.function) + { + case KeyCondition::RPNElement::FUNCTION_IN_RANGE: + case KeyCondition::RPNElement::FUNCTION_NOT_IN_RANGE: { + /// Rows with null values should never output when filters like ">=", ">", "<=", "<", '=' are applied + if (field.isNull()) + return false; + + bool res = elem.range.intersectsRange(key_range); + if (elem.function == KeyCondition::RPNElement::FUNCTION_NOT_IN_RANGE) + res = !res; + return res; + } + case KeyCondition::RPNElement::FUNCTION_IN_SET: + case KeyCondition::RPNElement::FUNCTION_NOT_IN_SET: { + const auto & set_index = elem.set_index; + const auto & ordered_set = set_index->getOrderedSet(); + const auto & set_column = ordered_set[0]; + + bool res = false; + for (size_t i = 0; i < set_column->size(); ++i) + { + if (Range::equals(field, (*set_column)[i])) + { + res = true; + break; + } + } + + if (elem.function == KeyCondition::RPNElement::FUNCTION_NOT_IN_SET) + res = !res; + return res; + } + case KeyCondition::RPNElement::FUNCTION_IS_NULL: + case KeyCondition::RPNElement::FUNCTION_IS_NOT_NULL: { + if (field.isNull()) + return elem.function == KeyCondition::RPNElement::FUNCTION_IS_NULL; + else + return elem.function == KeyCondition::RPNElement::FUNCTION_IS_NOT_NULL; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected RPNElement Function {}", elem.toString()); + } +} + +static void buildORCSearchArgumentImpl( + const KeyCondition & key_condition, + const Block & header, + const orc::Type & schema, + KeyCondition::RPN & rpn_stack, + orc::SearchArgumentBuilder & builder, + const FormatSettings & format_settings) +{ + if (rpn_stack.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty rpn stack in buildORCSearchArgumentImpl"); + + const auto & curr = rpn_stack.back(); + switch (curr.function) + { + case KeyCondition::RPNElement::FUNCTION_IN_RANGE: + case KeyCondition::RPNElement::FUNCTION_NOT_IN_RANGE: + case KeyCondition::RPNElement::FUNCTION_IN_SET: + case KeyCondition::RPNElement::FUNCTION_NOT_IN_SET: + case KeyCondition::RPNElement::FUNCTION_IS_NULL: + case KeyCondition::RPNElement::FUNCTION_IS_NOT_NULL: { + const bool need_wrap_not = curr.function == KeyCondition::RPNElement::FUNCTION_IS_NOT_NULL + || curr.function == KeyCondition::RPNElement::FUNCTION_NOT_IN_RANGE + || curr.function == KeyCondition::RPNElement::FUNCTION_NOT_IN_SET; + const bool contains_is_null = curr.function == KeyCondition::RPNElement::FUNCTION_IS_NULL + || curr.function == KeyCondition::RPNElement::FUNCTION_IS_NOT_NULL; + const bool contains_in_set = curr.function == KeyCondition::RPNElement::FUNCTION_IN_SET + || curr.function == KeyCondition::RPNElement::FUNCTION_NOT_IN_SET; + const bool contains_in_range = curr.function == KeyCondition::RPNElement::FUNCTION_IN_RANGE + || curr.function == KeyCondition::RPNElement::FUNCTION_NOT_IN_RANGE; + + SCOPE_EXIT({rpn_stack.pop_back();}); + + + /// Key filter expressions like "func(col) > 100" are not supported for ORC filter push down + if (!curr.monotonic_functions_chain.empty()) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + + /// key filter expressions like "(a, b, c) in " or "(func(a), b) in " are not supported for ORC filter push down + /// Only expressions like "a in " are supported currently, maybe we can improve it later. + auto set_index = curr.set_index; + if (contains_in_set) + { + if (!set_index || set_index->getOrderedSet().size() != 1 || set_index->hasMonotonicFunctionsChain()) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + } + + String column_name = getColumnNameFromKeyCondition(key_condition, curr.key_column); + const auto * orc_type = getORCTypeByName(schema, column_name, format_settings.orc.case_insensitive_column_matching); + if (!orc_type) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + + /// Make sure key column in header has exactly the same type with key column in ORC file schema + /// Counter-example 1: + /// Column a has type "Nullable(Int64)" in ORC file, but in header column a has type "Int64", which is allowed in CH. + /// For queries with where condition like "a is null", if a column contains null value, pushing or not pushing down filters + /// would result in different outputs. + /// Counter-example 2: + /// Column a has type "Nullable(Int64)" in ORC file, but in header column a has type "Nullable(UInt64)". + /// For queries with where condition like "a > 10", if a column contains negative values such as "-1", pushing or not pushing + /// down filters would result in different outputs. + bool skipped = false; + auto expect_type = makeNullableRecursively(parseORCType(orc_type, true, skipped)); + const ColumnWithTypeAndName * column = header.findByName(column_name, format_settings.orc.case_insensitive_column_matching); + if (!expect_type || !column) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + + auto nested_type = removeNullable(recursiveRemoveLowCardinality(column->type)); + auto expect_nested_type = removeNullable(expect_type); + if (!nested_type->equals(*expect_nested_type)) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + + /// If null_as_default is true, the only difference is nullable, and the evaluations of current RPNElement based on default and null field + /// have the same result, we still should push down current filter. + if (format_settings.null_as_default && !column->type->isNullable() && !column->type->isLowCardinalityNullable()) + { + bool match_if_null = evaluateRPNElement({}, curr); + bool match_if_default = evaluateRPNElement(column->type->getDefault(), curr); + if (match_if_default != match_if_null) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + } + + auto predicate_type = convertORCTypeToPredicateType(*orc_type); + if (!predicate_type.has_value()) + { + builder.literal(orc::TruthValue::YES_NO_NULL); + break; + } + + if (need_wrap_not) + builder.startNot(); + + if (contains_is_null) + { + builder.isNull(orc_type->getColumnId(), *predicate_type); + } + else if (contains_in_range) + { + const auto & range = curr.range; + bool has_left_bound = !range.left.isNegativeInfinity(); + bool has_right_bound = !range.right.isPositiveInfinity(); + if (!has_left_bound && !has_right_bound) + { + /// Transform whole range orc::TruthValue::YES_NULL + builder.literal(orc::TruthValue::YES_NULL); + } + else if (has_left_bound && has_right_bound && range.left_included && range.right_included && range.left == range.right) + { + /// Transform range with the same left bound and right bound to equal, which could utilize bloom filters in ORC + auto literal = convertFieldToORCLiteral(*orc_type, range.left); + if (literal.has_value()) + builder.equals(orc_type->getColumnId(), *predicate_type, *literal); + else + builder.literal(orc::TruthValue::YES_NO_NULL); + } + else + { + std::optional left_literal; + if (has_left_bound) + left_literal = convertFieldToORCLiteral(*orc_type, range.left); + + std::optional right_literal; + if (has_right_bound) + right_literal = convertFieldToORCLiteral(*orc_type, range.right); + + if (has_left_bound && has_right_bound) + builder.startAnd(); + + if (has_left_bound) + { + if (left_literal.has_value()) + { + /// >= is transformed to not < and > is transformed to not <= + builder.startNot(); + if (range.left_included) + builder.lessThan(orc_type->getColumnId(), *predicate_type, *left_literal); + else + builder.lessThanEquals(orc_type->getColumnId(), *predicate_type, *left_literal); + builder.end(); + } + else + builder.literal(orc::TruthValue::YES_NO_NULL); + } + + if (has_right_bound) + { + if (right_literal.has_value()) + { + if (range.right_included) + builder.lessThanEquals(orc_type->getColumnId(), *predicate_type, *right_literal); + else + builder.lessThan(orc_type->getColumnId(), *predicate_type, *right_literal); + } + else + builder.literal(orc::TruthValue::YES_NO_NULL); + } + + if (has_left_bound && has_right_bound) + builder.end(); + } + } + else if (contains_in_set) + { + /// Build literals from MergeTreeSetIndex + const auto & ordered_set = set_index->getOrderedSet(); + const auto & set_column = ordered_set[0]; + + bool fail = false; + std::vector literals; + literals.reserve(set_column->size()); + for (size_t i = 0; i < set_column->size(); ++i) + { + auto literal = convertFieldToORCLiteral(*orc_type, (*set_column)[i]); + if (!literal.has_value()) + { + fail = true; + break; + } + + literals.emplace_back(*literal); + } + + /// set has zero element + if (literals.empty()) + builder.literal(orc::TruthValue::YES); + else if (fail) + builder.literal(orc::TruthValue::YES_NO_NULL); + else + builder.in(orc_type->getColumnId(), *predicate_type, literals); + } + + if (need_wrap_not) + builder.end(); + + break; + } + case KeyCondition::RPNElement::FUNCTION_UNKNOWN: { + builder.literal(orc::TruthValue::YES_NO_NULL); + rpn_stack.pop_back(); + break; + } + case KeyCondition::RPNElement::FUNCTION_NOT: { + builder.startNot(); + rpn_stack.pop_back(); + buildORCSearchArgumentImpl(key_condition, header, schema, rpn_stack, builder, format_settings); + builder.end(); + break; + } + case KeyCondition::RPNElement::FUNCTION_AND: { + builder.startAnd(); + rpn_stack.pop_back(); + buildORCSearchArgumentImpl(key_condition, header, schema, rpn_stack, builder, format_settings); + buildORCSearchArgumentImpl(key_condition, header, schema, rpn_stack, builder, format_settings); + builder.end(); + break; + } + case KeyCondition::RPNElement::FUNCTION_OR: { + builder.startOr(); + rpn_stack.pop_back(); + buildORCSearchArgumentImpl(key_condition, header, schema, rpn_stack, builder, format_settings); + buildORCSearchArgumentImpl(key_condition, header, schema, rpn_stack, builder, format_settings); + builder.end(); + break; + } + case KeyCondition::RPNElement::ALWAYS_FALSE: { + builder.literal(orc::TruthValue::NO); + rpn_stack.pop_back(); + break; + } + case KeyCondition::RPNElement::ALWAYS_TRUE: { + builder.literal(orc::TruthValue::YES); + rpn_stack.pop_back(); + break; + } + } +} + +std::unique_ptr +buildORCSearchArgument(const KeyCondition & key_condition, const Block & header, const orc::Type & schema, const FormatSettings & format_settings) +{ + auto rpn_stack = key_condition.getRPN(); + if (rpn_stack.empty()) + return nullptr; + + auto builder = orc::SearchArgumentFactory::newBuilder(); + buildORCSearchArgumentImpl(key_condition, header, schema, rpn_stack, *builder, format_settings); + return builder->build(); +} + static void getFileReaderAndSchema( ReadBuffer & in, @@ -255,6 +736,11 @@ void NativeORCBlockInputFormat::prepareFileReader() if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) include_indices.push_back(static_cast(i)); } + + if (format_settings.orc.filter_push_down && key_condition && !sarg) + { + sarg = buildORCSearchArgument(*key_condition, getPort().getHeader(), file_reader->getType(), format_settings); + } } bool NativeORCBlockInputFormat::prepareStripeReader() @@ -276,11 +762,12 @@ bool NativeORCBlockInputFormat::prepareStripeReader() orc::RowReaderOptions row_reader_options; row_reader_options.include(include_indices); row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); + if (format_settings.orc.filter_push_down && sarg) + { + row_reader_options.searchArgument(sarg); + } + stripe_reader = file_reader->createRowReader(row_reader_options); - - if (!batch) - batch = stripe_reader->createRowBatch(format_settings.orc.row_batch_size); - return true; } @@ -312,6 +799,9 @@ Chunk NativeORCBlockInputFormat::generate() if (is_stopped) return {}; + /// TODO: figure out why reuse batch would cause asan fatals in https://s3.amazonaws.com/clickhouse-test-reports/55330/be39d23af2d7e27f5ec7f168947cf75aeaabf674/stateless_tests__asan__[4_4].html + /// Not sure if it is a false positive case. Notice that reusing batch will speed up reading ORC by 1.15x. + auto batch = stripe_reader->createRowBatch(format_settings.orc.row_batch_size); while (true) { bool ok = stripe_reader->next(*batch); @@ -339,7 +829,7 @@ void NativeORCBlockInputFormat::resetParser() file_reader.reset(); stripe_reader.reset(); include_indices.clear(); - batch.reset(); + sarg.reset(); block_missing_values.clear(); } @@ -496,16 +986,21 @@ readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::T const auto * orc_str_column = dynamic_cast(orc_column); size_t reserver_size = 0; for (size_t i = 0; i < orc_str_column->numElements; ++i) - reserver_size += orc_str_column->length[i] + 1; + { + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) + reserver_size += orc_str_column->length[i]; + reserver_size += 1; + } + column_chars_t.reserve(reserver_size); column_offsets.reserve(orc_str_column->numElements); size_t curr_offset = 0; for (size_t i = 0; i < orc_str_column->numElements; ++i) { - const auto * buf = orc_str_column->data[i]; - if (buf) + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) { + const auto * buf = orc_str_column->data[i]; size_t buf_size = orc_str_column->length[i]; column_chars_t.insert_assume_reserved(buf, buf + buf_size); curr_offset += buf_size; @@ -531,7 +1026,7 @@ readColumnWithFixedStringData(const orc::ColumnVectorBatch * orc_column, const o const auto * orc_str_column = dynamic_cast(orc_column); for (size_t i = 0; i < orc_str_column->numElements; ++i) { - if (orc_str_column->data[i]) + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) column_chars_t.insert_assume_reserved(orc_str_column->data[i], orc_str_column->data[i] + orc_str_column->length[i]); else column_chars_t.resize_fill(column_chars_t.size() + fixed_len); @@ -580,7 +1075,7 @@ readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const or for (size_t i = 0; i < orc_str_column->numElements; ++i) { /// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6. - if (orc_str_column->data[i] && orc_str_column->length[i] != sizeof(IPv6)) + if ((!orc_str_column->hasNulls || orc_str_column->notNull[i]) && orc_str_column->length[i] != sizeof(IPv6)) return readColumnWithStringData(orc_column, orc_type, column_name); } @@ -591,10 +1086,10 @@ readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const or for (size_t i = 0; i < orc_str_column->numElements; ++i) { - if (!orc_str_column->data[i]) [[unlikely]] - ipv6_column.insertDefault(); - else + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) ipv6_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); + else + ipv6_column.insertDefault(); } return {std::move(internal_column), std::move(internal_type), column_name}; @@ -628,9 +1123,7 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData( for (size_t i = 0; i < orc_str_column->numElements; ++i) { - if (!orc_str_column->data[i]) [[unlikely]] - integer_column.insertDefault(); - else + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) { if (sizeof(typename ColumnType::ValueType) != orc_str_column->length[i]) throw Exception( @@ -642,6 +1135,10 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData( integer_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); } + else + { + integer_column.insertDefault(); + } } return {std::move(internal_column), column_type, column_name}; } @@ -795,7 +1292,8 @@ static ColumnWithTypeAndName readColumnFromORCColumn( return readColumnWithNumericData(orc_column, orc_type, column_name); case orc::DATE: return readColumnWithDateData(orc_column, orc_type, column_name, type_hint); - case orc::TIMESTAMP: + case orc::TIMESTAMP: [[fallthrough]]; + case orc::TIMESTAMP_INSTANT: return readColumnWithTimestampData(orc_column, orc_type, column_name); case orc::DECIMAL: { auto interal_type = parseORCType(orc_type, false, skipped); diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h index 3326999f0aa..6ea7a063e0d 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h @@ -6,6 +6,8 @@ # include # include # include +# include +# include # include namespace DB @@ -42,6 +44,8 @@ std::unique_ptr asORCInputStream(ReadBuffer & in, const Format // Reads the whole file into a memory buffer, owned by the returned RandomAccessFile. std::unique_ptr asORCInputStreamLoadIntoMemory(ReadBuffer & in, std::atomic & is_cancelled); +std::unique_ptr buildORCSearchArgument( + const KeyCondition & key_condition, const Block & header, const orc::Type & schema, const FormatSettings & format_settings); class ORCColumnToCHColumn; class NativeORCBlockInputFormat : public IInputFormat @@ -69,7 +73,8 @@ private: std::unique_ptr file_reader; std::unique_ptr stripe_reader; std::unique_ptr orc_column_to_ch_column; - std::unique_ptr batch; + + std::shared_ptr sarg; // indices of columns to read from ORC file std::list include_indices; diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 827752d9db0..1e36c100667 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -426,14 +426,14 @@ void ORCBlockOutputFormat::writeColumn( const auto * timestamp_type = assert_cast(type.get()); UInt32 scale = timestamp_type->getScale(); writeDateTimes( - orc_column, - column, null_bytemap, - [scale](UInt64 value){ return value / std::pow(10, scale); }, - [scale](UInt64 value){ return (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); }); + orc_column, + column, + null_bytemap, + [scale](Int64 value) { return value / Int64(std::pow(10, scale)); }, + [scale](Int64 value) { return (value % Int64(std::pow(10, scale))) * Int64(std::pow(10, 9 - scale)); }); break; } - case TypeIndex::Decimal32: - { + case TypeIndex::Decimal32: { writeDecimals( orc_column, column, @@ -608,6 +608,7 @@ void ORCBlockOutputFormat::prepareWriter() const Block & header = getPort(PortKind::Main).getHeader(); schema = orc::createStructType(); options.setCompression(getORCCompression(format_settings.orc.output_compression_method)); + options.setRowIndexStride(format_settings.orc.output_row_index_stride); size_t columns_count = header.columns(); for (size_t i = 0; i != columns_count; ++i) schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]))); diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index c947eda42c7..3cfeb80afd5 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -387,16 +387,6 @@ ParquetBlockInputFormat::~ParquetBlockInputFormat() pool->wait(); } -void ParquetBlockInputFormat::setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context) -{ - /// When analyzer is enabled, query_info.filter_asts is missing sets and maybe some type casts, - /// so don't use it. I'm not sure how to support analyzer here: https://github.com/ClickHouse/ClickHouse/issues/53536 - if (format_settings.parquet.filter_push_down && !context->getSettingsRef().allow_experimental_analyzer) - key_condition.emplace(query_info, context, getPort().getHeader().getNames(), - std::make_shared(std::make_shared( - getPort().getHeader().getColumnsWithTypeAndName()))); -} - void ParquetBlockInputFormat::initializeIfNeeded() { if (std::exchange(is_initialized, true)) @@ -428,10 +418,12 @@ void ParquetBlockInputFormat::initializeIfNeeded() if (skip_row_groups.contains(row_group)) continue; - if (key_condition.has_value() && - !key_condition->checkInHyperrectangle( - getHyperrectangleForRowGroup(*metadata, row_group, getPort().getHeader(), format_settings), - getPort().getHeader().getDataTypes()).can_be_true) + if (format_settings.parquet.filter_push_down && key_condition + && !key_condition + ->checkInHyperrectangle( + getHyperrectangleForRowGroup(*metadata, row_group, getPort().getHeader(), format_settings), + getPort().getHeader().getDataTypes()) + .can_be_true) continue; if (row_group_batches.empty() || row_group_batches.back().total_bytes_compressed >= min_bytes_for_seek) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index c102dbee0f4..7fdf03a0606 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -56,8 +56,6 @@ public: ~ParquetBlockInputFormat() override; - void setQueryInfo(const SelectQueryInfo & query_info, ContextPtr context) override; - void resetParser() override; String getName() const override { return "ParquetBlockInputFormat"; } @@ -255,9 +253,6 @@ private: std::shared_ptr metadata; /// Indices of columns to read from Parquet file. std::vector column_indices; - /// Pushed-down filter that we'll use to skip row groups. - std::optional key_condition; - // Window of active row groups: // diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 6fa891297f6..14648e68f94 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -134,8 +134,7 @@ void PrettyBlockOutputFormat::write(Chunk chunk, PortKind port_kind) { if (total_rows >= format_settings.pretty.max_rows) { - if (port_kind != PortKind::PartialResult) - total_rows += chunk.getNumRows(); + total_rows += chunk.getNumRows(); return; } if (mono_block) @@ -316,8 +315,7 @@ void PrettyBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port_kind } writeString(bottom_separator_s, out); - if (port_kind != PortKind::PartialResult) - total_rows += num_rows; + total_rows += num_rows; } @@ -390,34 +388,6 @@ void PrettyBlockOutputFormat::consumeExtremes(Chunk chunk) write(std::move(chunk), PortKind::Extremes); } -void PrettyBlockOutputFormat::clearLastLines(size_t lines_number) -{ - /// http://en.wikipedia.org/wiki/ANSI_escape_code - #define MOVE_TO_PREV_LINE "\033[A" - #define CLEAR_TO_END_OF_LINE "\033[K" - - static const char * clear_prev_line = MOVE_TO_PREV_LINE \ - CLEAR_TO_END_OF_LINE; - - /// Move cursor to the beginning of line - writeCString("\r", out); - - for (size_t line = 0; line < lines_number; ++line) - { - writeCString(clear_prev_line, out); - } -} - -void PrettyBlockOutputFormat::consumePartialResult(Chunk chunk) -{ - if (prev_partial_block_rows > 0) - /// number of rows + header line + footer line - clearLastLines(prev_partial_block_rows + 2); - - prev_partial_block_rows = chunk.getNumRows(); - write(std::move(chunk), PortKind::PartialResult); -} - void PrettyBlockOutputFormat::writeMonoChunkIfNeeded() { diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 92466dce3ff..dfb23ac63f9 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -28,12 +28,7 @@ protected: void consumeTotals(Chunk) override; void consumeExtremes(Chunk) override; - void clearLastLines(size_t lines_number); - void consumePartialResult(Chunk) override; - size_t total_rows = 0; - size_t prev_partial_block_rows = 0; - size_t row_number_width = 7; // "10000. " const FormatSettings format_settings; @@ -60,7 +55,6 @@ protected: void resetFormatterImpl() override { total_rows = 0; - prev_partial_block_rows = 0; } private: diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 3a04d86b1ad..2ba9ec725e2 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -194,8 +194,7 @@ void PrettyCompactBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind po writeBottom(max_widths); - if (port_kind != PortKind::PartialResult) - total_rows += num_rows; + total_rows += num_rows; } diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index bbcfdbb7193..9cf609ed2d7 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -14,8 +14,8 @@ class LazyOutputFormat : public IOutputFormat { public: - explicit LazyOutputFormat(const Block & header, bool is_partial_result_protocol_active = false) - : IOutputFormat(header, out, is_partial_result_protocol_active), queue(2) {} + explicit LazyOutputFormat(const Block & header) + : IOutputFormat(header, out), queue(2) {} String getName() const override { return "LazyOutputFormat"; } @@ -49,7 +49,6 @@ protected: void consumeTotals(Chunk chunk) override { totals = std::move(chunk); } void consumeExtremes(Chunk chunk) override { extremes = std::move(chunk); } - void consumePartialResult(Chunk chunk) override { consume(std::move(chunk)); } private: diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 2f294a32531..8b160153733 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -40,10 +40,5 @@ std::string IProcessor::statusToName(Status status) UNREACHABLE(); } -ProcessorPtr IProcessor::getPartialResultProcessorPtr(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) -{ - return current_processor->getPartialResultProcessor(current_processor, partial_result_limit, partial_result_duration_ms); -} - } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 51a0bb1c121..c6bef186877 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -164,8 +164,6 @@ public: static std::string statusToName(Status status); - static ProcessorPtr getPartialResultProcessorPtr(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms); - /** Method 'prepare' is responsible for all cheap ("instantaneous": O(1) of data volume, no wait) calculations. * * It may access input and output ports, @@ -237,22 +235,6 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'expandPipeline' is not implemented for {} processor", getName()); } - enum class PartialResultStatus - { - /// Processor currently doesn't support work with the partial result pipeline. - NotSupported, - - /// Processor can be skipped in the partial result pipeline. - SkipSupported, - - /// Processor creates a light-weight copy of itself in the partial result pipeline. - /// The copy can create snapshots of the original processor or transform small blocks of data in the same way as the original processor - FullSupported, - }; - - virtual bool isPartialResultProcessor() const { return false; } - virtual PartialResultStatus getPartialResultProcessorSupportStatus() const { return PartialResultStatus::NotSupported; } - /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). bool isCancelled() const { return is_cancelled.load(std::memory_order_acquire); } @@ -387,11 +369,6 @@ public: protected: virtual void onCancel() {} - virtual ProcessorPtr getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getPartialResultProcessor' is not implemented for {} processor", getName()); - } - private: /// For: /// - elapsed_us diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index b2bf3c28eee..5e24062d67a 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -1,5 +1,5 @@ #include -#include + namespace DB { @@ -180,6 +180,7 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) return Status::NeedData; data.current_chunk = input.pull(true); + auto rows = data.current_chunk.getNumRows(); if (rows_before_limit_at_least && !data.input_port_has_counter) @@ -366,11 +367,5 @@ bool LimitTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort return true; } -ProcessorPtr LimitTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) -{ - const auto & header = inputs.front().getHeader(); - return std::make_shared(header, partial_result_limit, partial_result_duration_ms, limit, offset); -} - } diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index cfacc9634f9..33ff968985f 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -55,8 +55,6 @@ private: ColumnRawPtrs extractSortColumns(const Columns & columns) const; bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, UInt64 current_chunk_row_num) const; - ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; - public: LimitTransform( const Block & header_, UInt64 limit_, UInt64 offset_, size_t num_streams = 1, @@ -75,14 +73,6 @@ public: void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } void setInputPortHasCounter(size_t pos) { ports_data[pos].input_port_has_counter = true; } - - PartialResultStatus getPartialResultProcessorSupportStatus() const override - { - /// Currently LimitPartialResultTransform support only single-thread work. - bool is_partial_result_supported = inputs.size() == 1 && outputs.size() == 1; - - return is_partial_result_supported ? PartialResultStatus::FullSupported : PartialResultStatus::NotSupported; - } }; } diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp index cd911e4cdf4..fb3ed7f80fc 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp @@ -6,24 +6,10 @@ namespace DB { -namespace ErrorCodes -{ - extern const int FUNCTION_NOT_ALLOWED; -} - BuildQueryPipelineSettings BuildQueryPipelineSettings::fromContext(ContextPtr from) { BuildQueryPipelineSettings settings; - - const auto & context_settings = from->getSettingsRef(); - settings.partial_result_limit = context_settings.max_rows_in_partial_result; - settings.partial_result_duration_ms = context_settings.partial_result_update_duration_ms.totalMilliseconds(); - if (settings.partial_result_duration_ms && !context_settings.allow_experimental_partial_result) - throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, - "Partial results are not allowed by default, it's an experimental feature. " - "Setting 'allow_experimental_partial_result' must be enabled to use 'partial_result_update_duration_ms'"); - - settings.actions_settings = ExpressionActionsSettings::fromSettings(context_settings, CompileExpressions::yes); + settings.actions_settings = ExpressionActionsSettings::fromSettings(from->getSettingsRef(), CompileExpressions::yes); settings.process_list_element = from->getProcessListElement(); settings.progress_callback = from->getProgressCallback(); return settings; diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index 0410bf925d1..3b5e4e06953 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -19,9 +19,6 @@ struct BuildQueryPipelineSettings QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; - UInt64 partial_result_limit = 0; - UInt64 partial_result_duration_ms = 0; - const ExpressionActionsSettings & getActionsSettings() const { return actions_settings; } static BuildQueryPipelineSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index ec82c233ce4..2d2dc66a8c9 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -168,8 +168,6 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( QueryPipelineBuilderPtr last_pipeline; - bool has_partial_result_setting = build_pipeline_settings.partial_result_duration_ms > 0; - std::stack stack; stack.push(Frame{.node = root}); @@ -196,9 +194,6 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline( } else stack.push(Frame{.node = frame.node->children[next_child]}); - - if (has_partial_result_setting && last_pipeline && !last_pipeline->isPartialResultActive()) - last_pipeline->activatePartialResult(build_pipeline_settings.partial_result_limit, build_pipeline_settings.partial_result_duration_ms); } last_pipeline->setProgressCallback(build_pipeline_settings.progress_callback); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 05f39f72880..80fcc317d61 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -115,6 +115,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .save_marks_in_cache = true, .checksum_on_read = settings.checksum_on_read, .read_in_order = query_info.input_order_info != nullptr, + .apply_deleted_mask = settings.apply_deleted_mask, .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1), .enable_multiple_prewhere_read_steps = settings.enable_multiple_prewhere_read_steps, diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index a24c4dbe4d0..798073f94d3 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,11 +1,13 @@ +#include #include +#include #include namespace DB { ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_) - : ISourceStep(DataStream{.header = pipe_.getHeader()}) + : SourceStepWithFilter(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) , context(std::move(context_)) , qualified_projection_name(std::move(qualified_projection_name_)) @@ -23,4 +25,24 @@ void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, pipeline.init(std::move(pipe)); } +void ReadFromStorageStep::applyFilters() +{ + if (!context) + return; + + std::shared_ptr key_condition; + if (!context->getSettingsRef().allow_experimental_analyzer) + { + for (const auto & processor : pipe.getProcessors()) + if (auto * source = dynamic_cast(processor.get())) + source->setKeyCondition(query_info, context); + } + else + { + for (const auto & processor : pipe.getProcessors()) + if (auto * source = dynamic_cast(processor.get())) + source->setKeyCondition(filter_nodes.nodes, context); + } +} + } diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 2606f501009..16e790273ea 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -2,13 +2,15 @@ #include #include +#include #include +#include namespace DB { /// Create source from prepared pipe. -class ReadFromPreparedSource : public ISourceStep +class ReadFromPreparedSource : public SourceStepWithFilter { public: explicit ReadFromPreparedSource( @@ -27,19 +29,21 @@ protected: class ReadFromStorageStep : public ReadFromPreparedSource { public: - ReadFromStorageStep(Pipe pipe_, String storage_name, std::shared_ptr storage_limits_) - : ReadFromPreparedSource(std::move(pipe_)), storage_limits(std::move(storage_limits_)) + ReadFromStorageStep(Pipe pipe_, String storage_name, const SelectQueryInfo & query_info_, ContextPtr context_) + : ReadFromPreparedSource(std::move(pipe_), std::move(context_)), query_info(query_info_) { setStepDescription(storage_name); for (const auto & processor : pipe.getProcessors()) - processor->setStorageLimits(storage_limits); + processor->setStorageLimits(query_info.storage_limits); } String getName() const override { return "ReadFromStorage"; } + void applyFilters() override; + private: - std::shared_ptr storage_limits; + SelectQueryInfo query_info; }; } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index a72cab05754..371a24ac6f2 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -27,8 +27,6 @@ public: size_t max_bytes_before_external_sort = 0; TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; - UInt64 partial_result_limit = 0; - UInt64 partial_result_duration_ms = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h new file mode 100644 index 00000000000..d4b2d01c520 --- /dev/null +++ b/src/Processors/SourceWithKeyCondition.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Source with KeyCondition to push down filters. +class SourceWithKeyCondition : public ISource +{ +protected: + /// Represents pushed down filters in source + std::shared_ptr key_condition; + + void setKeyConditionImpl(const SelectQueryInfo & query_info, ContextPtr context, const Block & keys) + { + if (!context->getSettingsRef().allow_experimental_analyzer) + { + key_condition = std::make_shared( + query_info, + context, + keys.getNames(), + std::make_shared(std::make_shared(keys.getColumnsWithTypeAndName()))); + } + } + + void setKeyConditionImpl(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context, const Block & keys) + { + if (context->getSettingsRef().allow_experimental_analyzer) + { + std::unordered_map node_name_to_input_column; + for (const auto & column : keys.getColumnsWithTypeAndName()) + node_name_to_input_column.insert({column.name, column}); + + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context); + key_condition = std::make_shared( + filter_actions_dag, + context, + keys.getNames(), + std::make_shared(std::make_shared(keys.getColumnsWithTypeAndName())), + NameSet{}); + } + } + +public: + using Base = ISource; + using Base::Base; + + /// Set key_condition directly. It is used for filter push down in source. + virtual void setKeyCondition(const std::shared_ptr & key_condition_) { key_condition = key_condition_; } + + /// Set key_condition created by query_info and context. It is used for filter push down when allow_experimental_analyzer is false. + virtual void setKeyCondition(const SelectQueryInfo & /*query_info*/, ContextPtr /*context*/) { } + + /// Set key_condition created by nodes and context. It is used for filter push down when allow_experimental_analyzer is true. + virtual void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & /*nodes*/, ContextPtr /*context*/) { } +}; +} diff --git a/src/Processors/Transforms/AggregatingPartialResultTransform.cpp b/src/Processors/Transforms/AggregatingPartialResultTransform.cpp deleted file mode 100644 index cf8ce72e096..00000000000 --- a/src/Processors/Transforms/AggregatingPartialResultTransform.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include - -namespace DB -{ - -AggregatingPartialResultTransform::AggregatingPartialResultTransform( - const Block & input_header, const Block & output_header, AggregatingTransformPtr aggregating_transform_, - UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) - : PartialResultTransform(input_header, output_header, partial_result_limit_, partial_result_duration_ms_) - , aggregating_transform(std::move(aggregating_transform_)) - , transform_aggregator(input_header, aggregating_transform->params->params) - {} - -void AggregatingPartialResultTransform::transformPartialResult(Chunk & chunk) -{ - auto & params = aggregating_transform->params->params; - - bool no_more_keys = false; - AggregatedDataVariants variants; - ColumnRawPtrs key_columns(params.keys_size); - Aggregator::AggregateColumns aggregate_columns(params.aggregates_size); - - const UInt64 num_rows = chunk.getNumRows(); - transform_aggregator.executeOnBlock(chunk.detachColumns(), 0, num_rows, variants, key_columns, aggregate_columns, no_more_keys); - - auto transformed_block = transform_aggregator.convertToBlocks(variants, /*final*/ true, /*max_threads*/ 1).front(); - - chunk = convertToChunk(transformed_block); -} - -PartialResultTransform::ShaphotResult AggregatingPartialResultTransform::getRealProcessorSnapshot() -{ - std::lock_guard lock(aggregating_transform->snapshot_mutex); - if (aggregating_transform->is_generate_initialized) - return {{}, SnaphotStatus::Stopped}; - - if (aggregating_transform->variants.empty()) - return {{}, SnaphotStatus::NotReady}; - - auto & snapshot_aggregator = aggregating_transform->params->aggregator; - auto & snapshot_variants = aggregating_transform->many_data->variants; - auto block = snapshot_aggregator.prepareBlockAndFillWithoutKeySnapshot(*snapshot_variants.at(0)); - - return {convertToChunk(block), SnaphotStatus::Ready}; -} - -} diff --git a/src/Processors/Transforms/AggregatingPartialResultTransform.h b/src/Processors/Transforms/AggregatingPartialResultTransform.h deleted file mode 100644 index f7bac3a5394..00000000000 --- a/src/Processors/Transforms/AggregatingPartialResultTransform.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ - -class AggregatingPartialResultTransform : public PartialResultTransform -{ -public: - using AggregatingTransformPtr = std::shared_ptr; - - AggregatingPartialResultTransform( - const Block & input_header, const Block & output_header, AggregatingTransformPtr aggregating_transform_, - UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); - - String getName() const override { return "AggregatingPartialResultTransform"; } - - void transformPartialResult(Chunk & chunk) override; - ShaphotResult getRealProcessorSnapshot() override; - -private: - AggregatingTransformPtr aggregating_transform; - Aggregator transform_aggregator; -}; - -} diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 9546d396523..bf475c57d36 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -10,6 +9,7 @@ #include + namespace ProfileEvents { extern const Event ExternalAggregationMerge; @@ -660,8 +660,6 @@ void AggregatingTransform::consume(Chunk chunk) src_rows += num_rows; src_bytes += chunk.bytes(); - std::lock_guard lock(snapshot_mutex); - if (params->params.only_merge) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -681,10 +679,6 @@ void AggregatingTransform::initGenerate() if (is_generate_initialized.load(std::memory_order_acquire)) return; - std::lock_guard lock(snapshot_mutex); - if (is_generate_initialized.load(std::memory_order_relaxed)) - return; - is_generate_initialized.store(true, std::memory_order_release); /// If there was no data, and we aggregate without keys, and we must return single row with the result of empty aggregation. @@ -815,12 +809,4 @@ void AggregatingTransform::initGenerate() } } -ProcessorPtr AggregatingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) -{ - const auto & input_header = inputs.front().getHeader(); - const auto & output_header = outputs.front().getHeader(); - auto aggregating_processor = std::dynamic_pointer_cast(current_processor); - return std::make_shared(input_header, output_header, std::move(aggregating_processor), partial_result_limit, partial_result_duration_ms); -} - } diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 7b13b1a34f6..3420cdeaa50 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -170,23 +170,9 @@ public: void work() override; Processors expandPipeline() override; - PartialResultStatus getPartialResultProcessorSupportStatus() const override - { - /// Currently AggregatingPartialResultTransform support only single-thread aggregation without key. - - /// TODO: check that insert results from aggregator.prepareBlockAndFillWithoutKey return values without - /// changing of the aggregator state when aggregation with keys will be supported in AggregatingPartialResultTransform. - bool is_partial_result_supported = params->params.keys_size == 0 /// Aggregation without key. - && many_data->variants.size() == 1; /// Use only one stream for aggregation. - - return is_partial_result_supported ? PartialResultStatus::FullSupported : PartialResultStatus::NotSupported; - } - protected: void consume(Chunk chunk); - ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; - private: /// To read the data that was flushed into the temporary data file. Processors processors; @@ -226,13 +212,6 @@ private: bool is_consume_started = false; - friend class AggregatingPartialResultTransform; - /// The mutex protects variables that are used for creating a snapshot of the current processor. - /// The current implementation of AggregatingPartialResultTransform uses the 'is_generate_initialized' variable to check - /// whether the processor has started sending data through the main pipeline, and the corresponding partial result processor should stop creating snapshots. - /// Additionally, the mutex protects the 'params->aggregator' and 'many_data->variants' variables, which are used to get data from them for a snapshot. - std::mutex snapshot_mutex; - void initGenerate(); }; diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index bd92267a733..0d3341b000c 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -25,14 +25,6 @@ void ExpressionTransform::transform(Chunk & chunk) chunk.setColumns(block.getColumns(), num_rows); } -ProcessorPtr ExpressionTransform::getPartialResultProcessor(const ProcessorPtr & /*current_processor*/, UInt64 /*partial_result_limit*/, UInt64 /*partial_result_duration_ms*/) -{ - const auto & header = getInputPort().getHeader(); - auto result = std::make_shared(header, expression); - result->setDescription("(Partial result)"); - return result; -} - ConvertingTransform::ConvertingTransform(const Block & header_, ExpressionActionsPtr expression_) : ExceptionKeepingTransform(header_, ExpressionTransform::transformHeader(header_, expression_->getActionsDAG())) , expression(std::move(expression_)) diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 8250f25f0f8..791c7d7ba73 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -26,15 +26,10 @@ public: static Block transformHeader(Block header, const ActionsDAG & expression); - PartialResultStatus getPartialResultProcessorSupportStatus() const override { return PartialResultStatus::FullSupported; } - protected: void transform(Chunk & chunk) override; - ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; - private: - ExpressionActionsPtr expression; }; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 4ef17a831bc..9577f7ca7ff 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -54,13 +54,14 @@ template static FillColumnDescription::StepFunction getStepFunction( IntervalKind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale) { + static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); switch (kind) // NOLINT(bugprone-switch-missing-default-case) { #define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ return [step, scale, &date_lut](Field & field) { \ field = Add##NAME##sImpl::execute(static_cast(\ - field.get()), static_cast(step), date_lut, scale); }; + field.get()), static_cast(step), date_lut, utc_time_zone, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE @@ -154,6 +155,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & { const auto & step_dec = descr.fill_step.get &>(); Int64 step = DecimalUtils::convertTo(step_dec.getValue(), step_dec.getScale()); + static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); switch (*descr.step_kind) // NOLINT(bugprone-switch-missing-default-case) { @@ -162,7 +164,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = field.get>(); \ - auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \ + auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, utc_time_zone, field_decimal.getScale()); \ field = DecimalField(res, field_decimal.getScale()); \ }; \ break; diff --git a/src/Processors/Transforms/LimitPartialResultTransform.cpp b/src/Processors/Transforms/LimitPartialResultTransform.cpp deleted file mode 100644 index c9eaa9dc7dd..00000000000 --- a/src/Processors/Transforms/LimitPartialResultTransform.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include - -namespace DB -{ - -LimitPartialResultTransform::LimitPartialResultTransform( - const Block & header, - UInt64 partial_result_limit_, - UInt64 partial_result_duration_ms_, - UInt64 limit_, - UInt64 offset_) - : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) - , limit(limit_) - , offset(offset_) - {} - -void LimitPartialResultTransform::transformPartialResult(Chunk & chunk) -{ - UInt64 num_rows = chunk.getNumRows(); - if (num_rows < offset || limit == 0) - { - chunk = {}; - return; - } - - UInt64 length = std::min(limit, num_rows - offset); - - /// Check if some rows should be removed - if (length < num_rows) - { - UInt64 num_columns = chunk.getNumColumns(); - auto columns = chunk.detachColumns(); - - for (UInt64 i = 0; i < num_columns; ++i) - columns[i] = columns[i]->cut(offset, length); - - chunk.setColumns(std::move(columns), length); - } -} - -} diff --git a/src/Processors/Transforms/LimitPartialResultTransform.h b/src/Processors/Transforms/LimitPartialResultTransform.h deleted file mode 100644 index 3a0116b624d..00000000000 --- a/src/Processors/Transforms/LimitPartialResultTransform.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class LimitTransform; - -/// Currently support only single thread implementation with one input and one output ports -class LimitPartialResultTransform : public PartialResultTransform -{ -public: - using LimitTransformPtr = std::shared_ptr; - - LimitPartialResultTransform( - const Block & header, - UInt64 partial_result_limit_, - UInt64 partial_result_duration_ms_, - UInt64 limit_, - UInt64 offset_); - - String getName() const override { return "LimitPartialResultTransform"; } - - void transformPartialResult(Chunk & chunk) override; - /// LimitsTransform doesn't have a state which can be snapshoted - ShaphotResult getRealProcessorSnapshot() override { return {{}, SnaphotStatus::Stopped}; } - -private: - UInt64 limit; - UInt64 offset; - - LimitTransformPtr limit_transform; -}; - -} diff --git a/src/Processors/Transforms/LimitsCheckingTransform.cpp b/src/Processors/Transforms/LimitsCheckingTransform.cpp index 0557f3f291e..02d2fef808c 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace DB diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index eabb988dab6..2f96a17c17b 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -33,8 +33,6 @@ public: void setQuota(const std::shared_ptr & quota_) { quota = quota_; } - PartialResultStatus getPartialResultProcessorSupportStatus() const override { return PartialResultStatus::SkipSupported; } - protected: void transform(Chunk & chunk) override; diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp b/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp deleted file mode 100644 index 44b34ce3f58..00000000000 --- a/src/Processors/Transforms/MergeSortingPartialResultTransform.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - -namespace DB -{ - -MergeSortingPartialResultTransform::MergeSortingPartialResultTransform( - const Block & header, MergeSortingTransformPtr merge_sorting_transform_, - UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) - : PartialResultTransform(header, partial_result_limit_, partial_result_duration_ms_) - , merge_sorting_transform(std::move(merge_sorting_transform_)) - {} - -PartialResultTransform::ShaphotResult MergeSortingPartialResultTransform::getRealProcessorSnapshot() -{ - std::lock_guard lock(merge_sorting_transform->snapshot_mutex); - if (merge_sorting_transform->generated_prefix) - return {{}, SnaphotStatus::Stopped}; - - if (merge_sorting_transform->chunks.empty()) - return {{}, SnaphotStatus::NotReady}; - - /// Sort all input data - merge_sorting_transform->remerge(); - - /// It's possible that we had only empty chunks before remerge - if (merge_sorting_transform->chunks.empty()) - return {{}, SnaphotStatus::NotReady}; - - /// Add a copy of the first `partial_result_limit` rows to a generated_chunk - /// to send it later as a partial result in the next prepare stage of the current processor - auto generated_columns = merge_sorting_transform->chunks[0].cloneEmptyColumns(); - - size_t total_rows = 0; - for (const auto & merged_chunk : merge_sorting_transform->chunks) - { - size_t rows = std::min(merged_chunk.getNumRows(), partial_result_limit - total_rows); - if (rows == 0) - break; - - for (size_t position = 0; position < generated_columns.size(); ++position) - { - auto column = merged_chunk.getColumns()[position]; - generated_columns[position]->insertRangeFrom(*column, 0, rows); - } - - total_rows += rows; - } - - auto partial_result = Chunk(std::move(generated_columns), total_rows, merge_sorting_transform->chunks[0].getChunkInfo()); - merge_sorting_transform->enrichChunkWithConstants(partial_result); - return {std::move(partial_result), SnaphotStatus::Ready}; -} - -} diff --git a/src/Processors/Transforms/MergeSortingPartialResultTransform.h b/src/Processors/Transforms/MergeSortingPartialResultTransform.h deleted file mode 100644 index 781aa8e1265..00000000000 --- a/src/Processors/Transforms/MergeSortingPartialResultTransform.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class MergeSortingPartialResultTransform : public PartialResultTransform -{ -public: - using MergeSortingTransformPtr = std::shared_ptr; - - MergeSortingPartialResultTransform( - const Block & header, MergeSortingTransformPtr merge_sorting_transform_, - UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); - - String getName() const override { return "MergeSortingPartialResultTransform"; } - - /// MergeSortingTransform always receives chunks in a sorted state, so transformation is not needed - void transformPartialResult(Chunk & /*chunk*/) override {} - ShaphotResult getRealProcessorSnapshot() override; - -private: - MergeSortingTransformPtr merge_sorting_transform; -}; - -} diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index e801e5e16d5..de77711d129 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -137,8 +136,6 @@ void MergeSortingTransform::consume(Chunk chunk) /// If there were only const columns in sort description, then there is no need to sort. /// Return the chunk as is. - std::lock_guard lock(snapshot_mutex); - if (description.empty()) { generated_chunk = std::move(chunk); @@ -216,8 +213,6 @@ void MergeSortingTransform::serialize() void MergeSortingTransform::generate() { - std::lock_guard lock(snapshot_mutex); - if (!generated_prefix) { size_t num_tmp_files = tmp_data ? tmp_data->getStreams().size() : 0; @@ -278,11 +273,4 @@ void MergeSortingTransform::remerge() sum_bytes_in_blocks = new_sum_bytes_in_blocks; } -ProcessorPtr MergeSortingTransform::getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) -{ - const auto & header = inputs.front().getHeader(); - auto merge_sorting_processor = std::dynamic_pointer_cast(current_processor); - return std::make_shared(header, std::move(merge_sorting_processor), partial_result_limit, partial_result_duration_ms); -} - } diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h index 67f098b4362..e8c180b6903 100644 --- a/src/Processors/Transforms/MergeSortingTransform.h +++ b/src/Processors/Transforms/MergeSortingTransform.h @@ -33,8 +33,6 @@ public: String getName() const override { return "MergeSortingTransform"; } - PartialResultStatus getPartialResultProcessorSupportStatus() const override { return PartialResultStatus::FullSupported; } - protected: void consume(Chunk chunk) override; void serialize() override; @@ -42,8 +40,6 @@ protected: Processors expandPipeline() override; - ProcessorPtr getPartialResultProcessor(const ProcessorPtr & current_processor, UInt64 partial_result_limit, UInt64 partial_result_duration_ms) override; - private: size_t max_bytes_before_remerge; double remerge_lowered_memory_bytes_ratio; @@ -63,13 +59,6 @@ private: void remerge(); ProcessorPtr external_merging_sorted; - - friend class MergeSortingPartialResultTransform; - /// The mutex protects variables that are used for creating a snapshot of the current processor. - /// The current implementation of MergeSortingPartialResultTransform uses the 'generated_prefix' variable to check - /// whether the processor has started sending data through the main pipeline, and the corresponding partial result processor should stop creating snapshots. - /// Additionally, the mutex protects the 'chunks' variable and all variables in the 'remerge' function, which is used to transition 'chunks' to a sorted state. - std::mutex snapshot_mutex; }; } diff --git a/src/Processors/Transforms/PartialResultTransform.cpp b/src/Processors/Transforms/PartialResultTransform.cpp deleted file mode 100644 index 97ff79dee54..00000000000 --- a/src/Processors/Transforms/PartialResultTransform.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include - -namespace DB -{ - - -PartialResultTransform::PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) - : PartialResultTransform(header, header, partial_result_limit_, partial_result_duration_ms_) {} - -PartialResultTransform::PartialResultTransform(const Block & input_header, const Block & output_header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) - : IProcessor({input_header}, {output_header}) - , input(inputs.front()) - , output(outputs.front()) - , partial_result_limit(partial_result_limit_) - , partial_result_duration_ms(partial_result_duration_ms_) - , watch(CLOCK_MONOTONIC) - {} - -IProcessor::Status PartialResultTransform::prepare() -{ - if (output.isFinished()) - { - input.close(); - return Status::Finished; - } - - if (finished_getting_snapshots) - { - output.finish(); - return Status::Finished; - } - - if (!output.canPush()) - { - input.setNotNeeded(); - return Status::PortFull; - } - - /// If input data from previous partial result processor is finished then - /// PartialResultTransform ready to create snapshots and send them as a partial result - if (input.isFinished()) - { - if (partial_result.snapshot_status == SnaphotStatus::Ready) - { - partial_result.snapshot_status = SnaphotStatus::NotReady; - output.push(std::move(partial_result.chunk)); - return Status::PortFull; - } - - return Status::Ready; - } - - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - - partial_result.chunk = input.pull(); - transformPartialResult(partial_result.chunk); - if (partial_result.chunk.getNumRows() > 0) - { - output.push(std::move(partial_result.chunk)); - return Status::PortFull; - } - - return Status::NeedData; -} - -void PartialResultTransform::work() -{ - if (partial_result_duration_ms < watch.elapsedMilliseconds()) - { - partial_result = getRealProcessorSnapshot(); - if (partial_result.snapshot_status == SnaphotStatus::Stopped) - finished_getting_snapshots = true; - - watch.restart(); - } -} - -} diff --git a/src/Processors/Transforms/PartialResultTransform.h b/src/Processors/Transforms/PartialResultTransform.h deleted file mode 100644 index 4fe87638f38..00000000000 --- a/src/Processors/Transforms/PartialResultTransform.h +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/// Processors of this type are used to construct an auxiliary pipeline with processors corresponding to those in the main pipeline. -/// These processors work in two modes: -/// 1) Creating a snapshot of the corresponding processor from the main pipeline once per partial_result_duration_ms (period in milliseconds), and then sending the snapshot through the partial result pipeline. -/// 2) Transforming small blocks of data in the same way as the original processor and sending the transformed data through the partial result pipeline. -/// All processors of this type rely on the invariant that a new block from the previous processor of the partial result pipeline overwrites information about the previous block of the same previous processor. -class PartialResultTransform : public IProcessor -{ -public: - PartialResultTransform(const Block & header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); - PartialResultTransform(const Block & input_header, const Block & output_header, UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); - - String getName() const override { return "PartialResultTransform"; } - - Status prepare() override; - void work() override; - - bool isPartialResultProcessor() const override { return true; } - -protected: - enum class SnaphotStatus - { - NotReady, // Waiting for data from the previous partial result processor or awaiting a timer before creating the snapshot. - Ready, // Current partial result processor has received a snapshot from the processor in the main pipeline. - Stopped, // The processor from the main pipeline has started sending data, and the pipeline for partial results should use data from the next processors of the main pipeline. - }; - - struct ShaphotResult - { - Chunk chunk; - SnaphotStatus snapshot_status; - }; - - InputPort & input; - OutputPort & output; - - UInt64 partial_result_limit; - UInt64 partial_result_duration_ms; - - ShaphotResult partial_result = {{}, SnaphotStatus::NotReady}; - - bool finished_getting_snapshots = false; - - virtual void transformPartialResult(Chunk & /*chunk*/) = 0; - virtual ShaphotResult getRealProcessorSnapshot() = 0; // { return {{}, SnaphotStatus::Stopped}; } - -private: - Stopwatch watch; -}; - -} diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index be76971ddcd..9565a073f48 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1066,7 +1066,7 @@ void WindowTransform::appendChunk(Chunk & chunk) auto columns = chunk.detachColumns(); block.original_input_columns = columns; for (auto & column : columns) - column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); + column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()->convertToFullColumnIfSparse()); block.input_columns = std::move(columns); // Initialize output columns. diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index b22c135e865..b1c82d7a7e8 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include @@ -170,9 +169,12 @@ Pipe::Pipe(ProcessorPtr source) { checkSource(*source); + if (collected_processors) + collected_processors->emplace_back(source); + output_ports.push_back(&source->getOutputs().front()); header = output_ports.front()->getHeader(); - addProcessor(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = 1; } @@ -313,18 +315,6 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow for (auto & pipe : pipes) { - if (res.isPartialResultActive() && pipe.isPartialResultActive()) - { - res.partial_result_ports.insert(res.partial_result_ports.end(), pipe.partial_result_ports.begin(), pipe.partial_result_ports.end()); - } - else - { - if (pipe.isPartialResultActive()) - pipe.dropPartialResult(); - if (res.isPartialResultActive()) - res.dropPartialResult(); - } - if (!allow_empty_header || pipe.header) assertCompatibleHeader(pipe.header, res.header, "Pipe::unitePipes"); @@ -364,11 +354,11 @@ void Pipe::addSource(ProcessorPtr source) else assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - output_ports.push_back(&source->getOutputs().front()); - if (isPartialResultActive()) - partial_result_ports.push_back(nullptr); + if (collected_processors) + collected_processors->emplace_back(source); - addProcessor(std::move(source)); + output_ports.push_back(&source->getOutputs().front()); + processors->emplace_back(std::move(source)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -386,9 +376,11 @@ void Pipe::addTotalsSource(ProcessorPtr source) assertBlocksHaveEqualStructure(header, source_header, "Pipes"); - totals_port = &source->getOutputs().front(); + if (collected_processors) + collected_processors->emplace_back(source); - addProcessor(std::move(source)); + totals_port = &source->getOutputs().front(); + processors->emplace_back(std::move(source)); } void Pipe::addExtremesSource(ProcessorPtr source) @@ -404,20 +396,11 @@ void Pipe::addExtremesSource(ProcessorPtr source) assertBlocksHaveEqualStructure(header, source_header, "Pipes"); + if (collected_processors) + collected_processors->emplace_back(source); + extremes_port = &source->getOutputs().front(); - - addProcessor(std::move(source)); -} - -void Pipe::activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_) -{ - if (is_partial_result_active) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Partial result for Pipe should be initialized only once"); - - is_partial_result_active = true; - partial_result_limit = partial_result_limit_; - partial_result_duration_ms = partial_result_duration_ms_; - partial_result_ports.assign(output_ports.size(), nullptr); + processors->emplace_back(std::move(source)); } static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) @@ -445,15 +428,6 @@ void Pipe::dropExtremes() dropPort(extremes_port, *processors, collected_processors); } -void Pipe::dropPartialResult() -{ - for (auto & port : partial_result_ports) - dropPort(port, *processors, collected_processors); - - is_partial_result_active = false; - partial_result_ports.clear(); -} - void Pipe::addTransform(ProcessorPtr transform) { addTransform(std::move(transform), static_cast(nullptr), static_cast(nullptr)); @@ -484,8 +458,6 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes) extremes_port = extremes; - addPartialResultTransform(transform); - size_t next_output = 0; for (auto & input : inputs) { @@ -536,7 +508,10 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (extremes_port) assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes"); - addProcessor(std::move(transform)); + if (collected_processors) + collected_processors->emplace_back(transform); + + processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -573,8 +548,6 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes_port = nullptr; } - addPartialResultTransform(transform); - bool found_totals = false; bool found_extremes = false; @@ -624,119 +597,14 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * if (extremes_port) assertBlocksHaveEqualStructure(header, extremes_port->getHeader(), "Pipes"); - addProcessor(std::move(transform)); + if (collected_processors) + collected_processors->emplace_back(transform); + + processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } -void Pipe::addPartialResultSimpleTransform(const ProcessorPtr & transform, size_t partial_result_port_id) -{ - if (isPartialResultActive()) - { - auto & partial_result_port = partial_result_ports[partial_result_port_id]; - auto partial_result_status = transform->getPartialResultProcessorSupportStatus(); - - if (partial_result_status == IProcessor::PartialResultStatus::NotSupported) - dropPort(partial_result_port, *processors, collected_processors); - - if (partial_result_status != IProcessor::PartialResultStatus::FullSupported) - return; - - auto partial_result_transform = IProcessor::getPartialResultProcessorPtr(transform, partial_result_limit, partial_result_duration_ms); - - connectPartialResultPort(partial_result_port, partial_result_transform->getInputs().front()); - - partial_result_port = &partial_result_transform->getOutputs().front(); - - addProcessor(std::move(partial_result_transform)); - } -} - -void Pipe::addPartialResultTransform(const ProcessorPtr & transform) -{ - if (isPartialResultActive()) - { - size_t new_outputs_size = 0; - for (const auto & output : transform->getOutputs()) - { - /// We do not use totals_port and extremes_port in partial result - if ((totals_port && totals_port == &output) || (extremes_port && extremes_port == &output)) - continue; - ++new_outputs_size; - } - - auto partial_result_status = transform->getPartialResultProcessorSupportStatus(); - - if (partial_result_status == IProcessor::PartialResultStatus::SkipSupported && new_outputs_size != partial_result_ports.size()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot skip transform {} in the partial result part of the Pipe because it has {} output ports, but the partial result part expects {} output ports", - transform->getName(), - new_outputs_size, - partial_result_ports.size()); - - if (partial_result_status == IProcessor::PartialResultStatus::NotSupported) - { - for (auto & partial_result_port : partial_result_ports) - dropPort(partial_result_port, *processors, collected_processors); - - partial_result_ports.assign(new_outputs_size, nullptr); - return; - } - - if (partial_result_status != IProcessor::PartialResultStatus::FullSupported) - return; - - auto partial_result_transform = IProcessor::getPartialResultProcessorPtr(transform, partial_result_limit, partial_result_duration_ms); - auto & inputs = partial_result_transform->getInputs(); - - if (inputs.size() != partial_result_ports.size()) - { - WriteBufferFromOwnString out; - if (processors && !processors->empty()) - printPipeline(*processors, out); - - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot add partial result transform {} to Pipe because it has {} input ports, but {} expected\n{}", - partial_result_transform->getName(), - inputs.size(), - partial_result_ports.size(), out.str()); - } - - size_t next_port = 0; - for (auto & input : inputs) - { - connectPartialResultPort(partial_result_ports[next_port], input); - ++next_port; - } - - partial_result_ports.assign(new_outputs_size, nullptr); - - next_port = 0; - for (auto & new_partial_result_port : partial_result_transform->getOutputs()) - { - partial_result_ports[next_port] = &new_partial_result_port; - ++next_port; - } - - addProcessor(std::move(partial_result_transform)); - } -} - -void Pipe::connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port) -{ - if (partial_result_port == nullptr) - { - auto source = std::make_shared(getHeader()); - partial_result_port = &source->getPort(); - - addProcessor(std::move(source)); - } - - connect(*partial_result_port, partial_result_transform_port); -} - void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) @@ -744,7 +612,7 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) Block new_header; - auto add_transform = [&](OutputPort *& port, size_t partial_result_port_id, StreamType stream_type) + auto add_transform = [&](OutputPort *& port, StreamType stream_type) { if (!port) return; @@ -780,22 +648,19 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { connect(*port, transform->getInputs().front()); port = &transform->getOutputs().front(); - if (stream_type == StreamType::Main) - addPartialResultSimpleTransform(transform, partial_result_port_id); - addProcessor(std::move(transform)); + if (collected_processors) + collected_processors->emplace_back(transform); + + processors->emplace_back(std::move(transform)); } }; - size_t partial_result_port_id = 0; for (auto & port : output_ports) - { - add_transform(port, partial_result_port_id, StreamType::Main); - ++partial_result_port_id; - } + add_transform(port, StreamType::Main); - add_transform(totals_port, 0, StreamType::Totals); - add_transform(extremes_port, 0, StreamType::Extremes); + add_transform(totals_port, StreamType::Totals); + add_transform(extremes_port, StreamType::Extremes); header = std::move(new_header); } @@ -816,7 +681,6 @@ void Pipe::addChains(std::vector chains) dropTotals(); dropExtremes(); - dropPartialResult(); size_t max_parallel_streams_for_chains = 0; @@ -835,21 +699,18 @@ void Pipe::addChains(std::vector chains) auto added_processors = Chain::getProcessors(std::move(chains[i])); for (auto & transform : added_processors) - addProcessor(std::move(transform)); + { + if (collected_processors) + collected_processors->emplace_back(transform); + + processors->emplace_back(std::move(transform)); + } } header = std::move(new_header); max_parallel_streams = std::max(max_parallel_streams, max_parallel_streams_for_chains); } -void Pipe::addProcessor(ProcessorPtr processor) -{ - if (collected_processors) - collected_processors->emplace_back(processor); - - processors->emplace_back(std::move(processor)); -} - void Pipe::resize(size_t num_streams, bool force, bool strict) { if (output_ports.empty()) @@ -910,9 +771,6 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) add_transform(totals_port, StreamType::Totals); add_transform(extremes_port, StreamType::Extremes); - for (auto & port : partial_result_ports) - add_transform(port, StreamType::PartialResult); - output_ports.clear(); header.clear(); } @@ -922,9 +780,6 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) if (output_ports.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot transform empty Pipe"); - /// TODO: Add functionality to work with partial result ports in transformer. - dropPartialResult(); - auto new_processors = transformer(output_ports); /// Create hash table with new processors. @@ -1014,10 +869,5 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } -OutputPort * Pipe::getPartialResultPort(size_t pos) const -{ - return partial_result_ports.empty() ? nullptr : partial_result_ports[pos]; -} - } diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index a6bd46a325b..09931e38578 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -48,9 +48,6 @@ public: OutputPort * getOutputPort(size_t pos) const { return output_ports[pos]; } OutputPort * getTotalsPort() const { return totals_port; } OutputPort * getExtremesPort() const { return extremes_port; } - OutputPort * getPartialResultPort(size_t pos) const; - - bool isPartialResultActive() { return is_partial_result_active; } /// Add processor to list, add it output ports to output_ports. /// Processor shouldn't have input ports, output ports shouldn't be connected. @@ -61,13 +58,9 @@ public: void addTotalsSource(ProcessorPtr source); void addExtremesSource(ProcessorPtr source); - /// Activate sending partial result during main pipeline execution - void activatePartialResult(UInt64 partial_result_limit_, UInt64 partial_result_duration_ms_); - - /// Drop totals, extremes and partial result (create NullSink for them). + /// Drop totals and extremes (create NullSink for them). void dropTotals(); void dropExtremes(); - void dropPartialResult(); /// Add processor to list. It should have size() input ports with compatible header. /// Output ports should have same headers. @@ -76,16 +69,11 @@ public: void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes); void addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes); - void addPartialResultTransform(const ProcessorPtr & transform); - void addPartialResultSimpleTransform(const ProcessorPtr & transform, size_t partial_result_port_id); - void connectPartialResultPort(OutputPort * partial_result_port, InputPort & partial_result_transform_port); - enum class StreamType { Main = 0, /// Stream for query data. There may be several streams of this type. Totals, /// Stream for totals. No more than one. Extremes, /// Stream for extremes. No more than one. - PartialResult, /// Stream for partial result data. There may be several streams of this type. }; using ProcessorGetter = std::function; @@ -121,17 +109,10 @@ private: Block header; std::shared_ptr processors; - /// If the variable is true, then each time a processor is added pipe will try - /// to add processor which will send partial result from original processor - bool is_partial_result_active = false; - UInt64 partial_result_limit = 0; - UInt64 partial_result_duration_ms = 0; - - /// Output ports. Totals, extremes and partial results are allowed to be empty. + /// Output ports. Totals and extremes are allowed to be empty. OutputPortRawPtrs output_ports; OutputPort * totals_port = nullptr; OutputPort * extremes_port = nullptr; - OutputPortRawPtrs partial_result_ports; /// It is the max number of processors which can be executed in parallel for each step. /// Usually, it's the same as the number of output ports. @@ -147,8 +128,6 @@ private: static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header); void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); - void addProcessor(ProcessorPtr processor); - friend class QueryPipelineBuilder; friend class QueryPipeline; }; diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 4ce0aa029be..935c006c217 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -73,8 +73,7 @@ static void checkPulling( Processors & processors, OutputPort * output, OutputPort * totals, - OutputPort * extremes, - OutputPort * partial_result) + OutputPort * extremes) { if (!output || output->isConnected()) throw Exception( @@ -91,15 +90,9 @@ static void checkPulling( ErrorCodes::LOGICAL_ERROR, "Cannot create pulling QueryPipeline because its extremes port is connected"); - if (partial_result && partial_result->isConnected()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot create pulling QueryPipeline because its partial_result port is connected"); - bool found_output = false; bool found_totals = false; bool found_extremes = false; - bool found_partial_result = false; for (const auto & processor : processors) { for (const auto & in : processor->getInputs()) @@ -113,8 +106,6 @@ static void checkPulling( found_totals = true; else if (extremes && &out == extremes) found_extremes = true; - else if (partial_result && &out == partial_result) - found_partial_result = true; else checkOutput(out, processor, processors); } @@ -132,10 +123,6 @@ static void checkPulling( throw Exception( ErrorCodes::LOGICAL_ERROR, "Cannot create pulling QueryPipeline because its extremes port does not belong to any processor"); - if (partial_result && !found_partial_result) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot create pulling QueryPipeline because its partial result port does not belong to any processor"); } static void checkCompleted(Processors & processors) @@ -338,20 +325,17 @@ QueryPipeline::QueryPipeline( std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_, - OutputPort * extremes_, - OutputPort * partial_result_) + OutputPort * extremes_) : resources(std::move(resources_)) , processors(std::move(processors_)) , output(output_) , totals(totals_) , extremes(extremes_) - , partial_result(partial_result_) { - checkPulling(*processors, output, totals, extremes, partial_result); + checkPulling(*processors, output, totals, extremes); } QueryPipeline::QueryPipeline(Pipe pipe) - : partial_result_duration_ms(pipe.partial_result_duration_ms) { if (pipe.numOutputPorts() > 0) { @@ -359,11 +343,8 @@ QueryPipeline::QueryPipeline(Pipe pipe) output = pipe.getOutputPort(0); totals = pipe.getTotalsPort(); extremes = pipe.getExtremesPort(); - partial_result = pipe.getPartialResultPort(0); - num_threads = pipe.max_parallel_streams; - processors = std::move(pipe.processors); - checkPulling(*processors, output, totals, extremes, partial_result); + checkPulling(*processors, output, totals, extremes); } else { @@ -395,7 +376,6 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); - auto & format_partial_result = format->getPort(IOutputFormat::PortKind::PartialResult); if (!totals) { @@ -411,21 +391,12 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) processors->emplace_back(std::move(source)); } - if (!partial_result) - { - auto source = std::make_shared(format_partial_result.getHeader()); - partial_result = &source->getPort(); - processors->emplace_back(std::move(source)); - } - connect(*totals, format_totals); connect(*extremes, format_extremes); - connect(*partial_result, format_partial_result); input = &format_main; totals = nullptr; extremes = nullptr; - partial_result = nullptr; output_format = format.get(); @@ -453,7 +424,6 @@ void QueryPipeline::complete(std::shared_ptr sink) drop(totals, *processors); drop(extremes, *processors); - drop(partial_result, *processors); connect(*output, sink->getPort()); processors->emplace_back(std::move(sink)); @@ -469,7 +439,6 @@ void QueryPipeline::complete(Chain chain) drop(totals, *processors); drop(extremes, *processors); - drop(partial_result, *processors); processors->reserve(processors->size() + chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) @@ -495,7 +464,6 @@ void QueryPipeline::complete(Pipe pipe) pipe.resize(1); pipe.dropExtremes(); pipe.dropTotals(); - pipe.dropPartialResult(); connect(*pipe.getOutputPort(0), *input); input = nullptr; @@ -524,13 +492,11 @@ void QueryPipeline::complete(std::shared_ptr format) addMaterializing(output, *processors); addMaterializing(totals, *processors); addMaterializing(extremes, *processors); - addMaterializing(partial_result, *processors); } auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); auto & format_extremes = format->getPort(IOutputFormat::PortKind::Extremes); - auto & format_partial_result = format->getPort(IOutputFormat::PortKind::PartialResult); if (!totals) { @@ -546,22 +512,13 @@ void QueryPipeline::complete(std::shared_ptr format) processors->emplace_back(std::move(source)); } - if (!partial_result) - { - auto source = std::make_shared(format_partial_result.getHeader()); - partial_result = &source->getPort(); - processors->emplace_back(std::move(source)); - } - connect(*output, format_main); connect(*totals, format_totals); connect(*extremes, format_extremes); - connect(*partial_result, format_partial_result); output = nullptr; totals = nullptr; extremes = nullptr; - partial_result = nullptr; initRowsBeforeLimit(format.get()); output_format = format.get(); @@ -733,7 +690,6 @@ void QueryPipeline::convertStructureTo(const ColumnsWithTypeAndName & columns) addExpression(output, actions, *processors); addExpression(totals, actions, *processors); addExpression(extremes, actions, *processors); - addExpression(partial_result, actions, *processors); } std::unique_ptr QueryPipeline::getReadProgressCallback() const diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 20e58bc0f59..f14cf61aac2 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -75,8 +75,7 @@ public: std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_ = nullptr, - OutputPort * extremes_ = nullptr, - OutputPort * partial_result_ = nullptr); + OutputPort * extremes_ = nullptr); bool initialized() const { return !processors->empty(); } /// When initialized, exactly one of the following is true. @@ -155,7 +154,6 @@ private: OutputPort * output = nullptr; OutputPort * totals = nullptr; OutputPort * extremes = nullptr; - OutputPort * partial_result = nullptr; QueryStatusPtr process_list_element; @@ -164,9 +162,6 @@ private: size_t num_threads = 0; bool concurrency_control = false; - UInt64 partial_result_limit = 0; - UInt64 partial_result_duration_ms = 0; - friend class PushingPipelineExecutor; friend class PullingPipelineExecutor; friend class PushingAsyncPipelineExecutor; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index e176e8585f5..f9726339872 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -110,15 +110,6 @@ void QueryPipelineBuilder::init(QueryPipeline & pipeline) pipe.header = {}; } - if (pipeline.partial_result) - { - /// Set partial result ports only after activation because when activated, it is set to nullptr - pipe.activatePartialResult(pipeline.partial_result_limit, pipeline.partial_result_duration_ms); - pipe.partial_result_ports = {pipeline.partial_result}; - } - else - pipe.dropPartialResult(); - pipe.totals_port = pipeline.totals; pipe.extremes_port = pipeline.extremes; pipe.max_parallel_streams = pipeline.num_threads; @@ -361,10 +352,6 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); - /// TODO: Support joining of partial results from different pipelines. - left->pipe.dropPartialResult(); - right->pipe.dropPartialResult(); - left->pipe.dropExtremes(); right->pipe.dropExtremes(); if (left->getNumStreams() != 1 || right->getNumStreams() != 1) @@ -377,7 +364,6 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped auto joining = std::make_shared(join, inputs, out_header, max_block_size); - /// TODO: Support partial results in merge pipelines after joining support above. return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors); } @@ -398,10 +384,6 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe left->pipe.dropExtremes(); right->pipe.dropExtremes(); - /// TODO: Support joining of partial results from different pipelines. - left->pipe.dropPartialResult(); - right->pipe.dropPartialResult(); - left->pipe.collected_processors = collected_processors; /// Collect the NEW processors for the right pipeline. @@ -652,7 +634,7 @@ PipelineExecutorPtr QueryPipelineBuilder::execute() if (!isCompleted()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute pipeline because it is not completed"); - return std::make_shared(pipe.processors, process_list_element, pipe.partial_result_duration_ms); + return std::make_shared(pipe.processors, process_list_element); } Pipe QueryPipelineBuilder::getPipe(QueryPipelineBuilder pipeline, QueryPlanResourceHolder & resources) diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index cee545ac29d..5d273df7068 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -85,15 +85,6 @@ public: /// Pipeline will be completed after this transformation. void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter); - /// Activate building separate pipeline for sending partial result. - void activatePartialResult(UInt64 partial_result_limit, UInt64 partial_result_duration_ms) - { - pipe.activatePartialResult(partial_result_limit, partial_result_duration_ms); - } - - /// Check if building of a pipeline for sending partial result active. - bool isPartialResultActive() { return pipe.isPartialResultActive(); } - /// Add totals which returns one chunk with single row with defaults. void addDefaultTotals(); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 7b88e8f3841..1aec67a7f81 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1193,6 +1193,16 @@ bool PredefinedQueryHandler::customizeQueryParam(ContextMutablePtr context, cons return true; } + if (startsWith(key, QUERY_PARAMETER_NAME_PREFIX)) + { + /// Save name and values of substitution in dictionary. + const String parameter_name = key.substr(strlen(QUERY_PARAMETER_NAME_PREFIX)); + + if (receive_params.contains(parameter_name)) + context->setQueryParameter(parameter_name, value); + return true; + } + return false; } diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index f660bafe23f..e1ee9586f83 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -180,7 +180,23 @@ void addDefaultHandlersFactory( return std::make_unique(server, "query"); }; auto query_handler = std::make_shared>(std::move(dynamic_creator)); - query_handler->allowPostAndGetParamsAndOptionsRequest(); + query_handler->addFilter([](const auto & request) + { + bool path_matches_get_or_head = startsWith(request.getURI(), "?") + || startsWith(request.getURI(), "/?") + || startsWith(request.getURI(), "/query?"); + bool is_get_or_head_request = request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET + || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD; + + bool path_matches_post_or_options = path_matches_get_or_head + || request.getURI() == "/" + || request.getURI().empty(); + bool is_post_or_options_request = request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST + || request.getMethod() == Poco::Net::HTTPRequest::HTTP_OPTIONS; + + return (path_matches_get_or_head && is_get_or_head_request) || (path_matches_post_or_options && is_post_or_options_request); + } + ); factory.addHandler(query_handler); /// We check that prometheus handler will be served on current (default) port. diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 6e952c2fd9d..94c1ad5b6f2 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -98,7 +98,7 @@ static inline auto headersFilter(const Poco::Util::AbstractConfiguration & confi { for (const auto & [header_name, header_expression] : headers_expression) { - const auto & header_value = request.get(header_name, ""); + const auto header_value = request.get(header_name, ""); if (!checkExpression(std::string_view(header_value.data(), header_value.size()), header_expression)) return false; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 4908bf82b46..871606c6298 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -10,8 +10,6 @@ #include #include #include -#include -#include #include #include #include @@ -106,7 +104,6 @@ namespace DB::ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int SUPPORT_IS_DISABLED; extern const int UNSUPPORTED_METHOD; - extern const int FUNCTION_NOT_ALLOWED; } namespace @@ -965,14 +962,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors() std::unique_lock progress_lock(task_callback_mutex, std::defer_lock); { - const auto & settings = query_context->getSettingsRef(); - bool has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0; - if (has_partial_result_setting && !settings.allow_experimental_partial_result) - throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, - "Partial results are not allowed by default, it's an experimental feature. " - "Setting 'allow_experimental_partial_result' must be enabled to use 'partial_result_update_duration_ms'"); - - PullingAsyncPipelineExecutor executor(pipeline, has_partial_result_setting); + PullingAsyncPipelineExecutor executor(pipeline); CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread}; Block block; diff --git a/src/Server/waitServersToFinish.cpp b/src/Server/waitServersToFinish.cpp index 3b07c082067..d6eb5b59b88 100644 --- a/src/Server/waitServersToFinish.cpp +++ b/src/Server/waitServersToFinish.cpp @@ -16,7 +16,7 @@ size_t waitServersToFinish(std::vector & servers, std current_connections = 0; { - std::scoped_lock lock{mutex}; + std::lock_guard lock{mutex}; for (auto & server : servers) { server.stop(); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 3ade4474b6b..c6fa17583b5 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -845,6 +845,12 @@ bool AlterCommand::isRemovingProperty() const return to_remove != RemoveProperty::NO_PROPERTY; } +bool AlterCommand::isDropSomething() const +{ + return type == Type::DROP_COLUMN || type == Type::DROP_INDEX + || type == Type::DROP_CONSTRAINT || type == Type::DROP_PROJECTION; +} + std::optional AlterCommand::tryConvertToMutationCommand(StorageInMemoryMetadata & metadata, ContextPtr context) const { if (!isRequireMutationStage(metadata)) diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index c06872f9757..74a11d2beaa 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -167,6 +167,8 @@ struct AlterCommand /// Command removing some property from column or table bool isRemovingProperty() const; + bool isDropSomething() const; + /// If possible, convert alter command to mutation command. In other case /// return empty optional. Some storages may execute mutations after /// metadata changes. diff --git a/src/Storages/CheckResults.h b/src/Storages/CheckResults.h index b342b014fa4..2e4652fea29 100644 --- a/src/Storages/CheckResults.h +++ b/src/Storages/CheckResults.h @@ -22,6 +22,4 @@ struct CheckResult {} }; -using CheckResults = std::vector; - } diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index 7c481d196d2..77a22cd00fc 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -27,6 +27,12 @@ public: , base_configuration(configuration_) , log(&Poco::Logger::get(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + template + static StoragePtr create(const Configuration & configuration_, ContextPtr context_, Args && ...args) + { + return std::make_shared>(configuration_, context_, std::forward(args)...); + } + String getName() const override { return name; } static ColumnsDescription getTableStructureFromData( @@ -109,8 +115,7 @@ static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) if (configuration.format == "auto") configuration.format = "Parquet"; - return std::make_shared( - configuration, args.getContext(), args.table_id, args.columns, args.constraints, + return DataLake::create(configuration, args.getContext(), args.table_id, args.columns, args.constraints, args.comment, getFormatSettings(args.getContext())); } diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp new file mode 100644 index 00000000000..2b8b7082515 --- /dev/null +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp @@ -0,0 +1,580 @@ +#include "config.h" + +#if USE_AWS_S3 && USE_AVRO + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; + extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; + extern const int UNSUPPORTED_METHOD; +} + +IcebergMetadata::IcebergMetadata( + const StorageS3::Configuration & configuration_, + DB::ContextPtr context_, + DB::Int32 metadata_version_, + DB::Int32 format_version_, + DB::String manifest_list_file_, + DB::Int32 current_schema_id_, + DB::NamesAndTypesList schema_) + : WithContext(context_) + , configuration(configuration_) + , metadata_version(metadata_version_) + , format_version(format_version_) + , manifest_list_file(std::move(manifest_list_file_)) + , current_schema_id(current_schema_id_) + , schema(std::move(schema_)) + , log(&Poco::Logger::get("IcebergMetadata")) +{ +} + +namespace +{ + +enum class ManifestEntryStatus +{ + EXISTING = 0, + ADDED = 1, + DELETED = 2, +}; + +enum class DataFileContent +{ + DATA = 0, + POSITION_DELETES = 1, + EQUALITY_DELETES = 2, +}; + +/** + * Iceberg supports the next data types (see https://iceberg.apache.org/spec/#schemas-and-data-types): + * - Primitive types: + * - boolean + * - int + * - long + * - float + * - double + * - decimal(P, S) + * - date + * - time (time of day in microseconds since midnight) + * - timestamp (in microseconds since 1970-01-01) + * - timestamptz (timestamp with timezone, stores values in UTC timezone) + * - string + * - uuid + * - fixed(L) (fixed-length byte array of length L) + * - binary + * - Complex types: + * - struct(field1: Type1, field2: Type2, ...) (tuple of typed values) + * - list(nested_type) + * - map(Key, Value) + * + * Example of table schema in metadata: + * { + * "type" : "struct", + * "schema-id" : 0, + * "fields" : [ + * { + * "id" : 1, + * "name" : "id", + * "required" : false, + * "type" : "long" + * }, + * { + * "id" : 2, + * "name" : "array", + * "required" : false, + * "type" : { + * "type" : "list", + * "element-id" : 5, + * "element" : "int", + * "element-required" : false + * }, + * { + * "id" : 3, + * "name" : "data", + * "required" : false, + * "type" : "binary" + * } + * } + */ + +DataTypePtr getSimpleTypeByName(const String & type_name) +{ + if (type_name == "boolean") + return DataTypeFactory::instance().get("Bool"); + if (type_name == "int") + return std::make_shared(); + if (type_name == "long") + return std::make_shared(); + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "date") + return std::make_shared(); + /// Time type represents time of the day in microseconds since midnight. + /// We don't have similar type for it, let's use just Int64. + if (type_name == "time") + return std::make_shared(); + if (type_name == "timestamp") + return std::make_shared(6); + if (type_name == "timestamptz") + return std::make_shared(6, "UTC"); + if (type_name == "string" || type_name == "binary") + return std::make_shared(); + if (type_name == "uuid") + return std::make_shared(); + + if (type_name.starts_with("fixed[") && type_name.ends_with(']')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 6, type_name.end() - 1)); + size_t n; + readIntText(n, buf); + return std::make_shared(n); + } + + if (type_name.starts_with("decimal(") && type_name.ends_with(')')) + { + ReadBufferFromString buf(std::string_view(type_name.begin() + 8, type_name.end() - 1)); + size_t precision; + size_t scale; + readIntText(precision, buf); + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + tryReadIntText(scale, buf); + return createDecimal(precision, scale); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown Iceberg type: {}", type_name); +} + +DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool required); + +DataTypePtr getComplexTypeFromObject(const Poco::JSON::Object::Ptr & type) +{ + String type_name = type->getValue("type"); + if (type_name == "list") + { + bool element_required = type->getValue("element-required"); + auto element_type = getFieldType(type, "element", element_required); + return std::make_shared(element_type); + } + + if (type_name == "map") + { + auto key_type = getFieldType(type, "key", true); + auto value_required = type->getValue("value-required"); + auto value_type = getFieldType(type, "value", value_required); + return std::make_shared(key_type, value_type); + } + + if (type_name == "struct") + { + DataTypes element_types; + Names element_names; + auto fields = type->get("fields").extract(); + element_types.reserve(fields->size()); + element_names.reserve(fields->size()); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + element_names.push_back(field->getValue("name")); + auto required = field->getValue("required"); + element_types.push_back(getFieldType(field, "type", required)); + } + + return std::make_shared(element_types, element_names); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown Iceberg type: {}", type_name); +} + +DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool required) +{ + if (field->isObject(type_key)) + return getComplexTypeFromObject(field->getObject(type_key)); + + auto type = field->get(type_key); + if (type.isString()) + { + const String & type_name = type.extract(); + auto data_type = getSimpleTypeByName(type_name); + return required ? data_type : makeNullable(data_type); + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'type' field: {}", type.toString()); + +} + +std::pair parseTableSchema(const Poco::JSON::Object::Ptr & metadata_object, int format_version) +{ + Poco::JSON::Object::Ptr schema; + Int32 current_schema_id; + + /// First check if schema was evolved, because we don't support it yet. + /// For version 2 we can check it by using field schemas, but in version 1 + /// this field is optional and we will check it later during parsing manifest files + /// (we will compare schema id from manifest file and currently used schema). + if (format_version == 2) + { + current_schema_id = metadata_object->getValue("current-schema-id"); + auto schemas = metadata_object->get("schemas").extract(); + if (schemas->size() != 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not supported"); + + /// Now we sure that there is only one schema. + schema = schemas->getObject(0); + if (schema->getValue("schema-id") != current_schema_id) + throw Exception(ErrorCodes::BAD_ARGUMENTS, R"(Field "schema-id" of the schema doesn't match "current-schema-id" in metadata)"); + } + else + { + schema = metadata_object->getObject("schema"); + current_schema_id = schema->getValue("schema-id"); + /// Field "schemas" is optional for version 1, but after version 2 was introduced, + /// in most cases this field is added for new tables in version 1 as well. + if (metadata_object->has("schemas") && metadata_object->get("schemas").extract()->size() > 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not supported"); + } + + NamesAndTypesList names_and_types; + auto fields = schema->get("fields").extract(); + for (size_t i = 0; i != fields->size(); ++i) + { + auto field = fields->getObject(static_cast(i)); + auto name = field->getValue("name"); + bool required = field->getValue("required"); + names_and_types.push_back({name, getFieldType(field, "type", required)}); + } + + return {std::move(names_and_types), current_schema_id}; +} + +MutableColumns parseAvro( + avro::DataFileReaderBase & file_reader, + const Block & header, + const FormatSettings & settings) +{ + auto deserializer = std::make_unique(header, file_reader.dataSchema(), true, true, settings); + MutableColumns columns = header.cloneEmptyColumns(); + + file_reader.init(); + RowReadExtension ext; + while (file_reader.hasMore()) + { + file_reader.decr(); + deserializer->deserializeRow(columns, file_reader.decoder(), ext); + } + return columns; +} + +/** + * Each version of table metadata is stored in a `metadata` directory and + * has format: v.metadata.json, where V - metadata version. + */ +std::pair getMetadataFileAndVersion(const StorageS3::Configuration & configuration) +{ + const auto metadata_files = S3DataLakeMetadataReadHelper::listFiles(configuration, "metadata", ".metadata.json"); + if (metadata_files.empty()) + { + throw Exception( + ErrorCodes::FILE_DOESNT_EXIST, + "The metadata file for Iceberg table with path {} doesn't exist", + configuration.url.key); + } + + std::vector> metadata_files_with_versions; + metadata_files_with_versions.reserve(metadata_files.size()); + for (const auto & path : metadata_files) + { + String file_name(path.begin() + path.find_last_of('/') + 1, path.end()); + String version_str(file_name.begin() + 1, file_name.begin() + file_name.find_first_of('.')); + if (!std::all_of(version_str.begin(), version_str.end(), isdigit)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad metadata file name: {}. Expected vN.metadata.json where N is a number", file_name); + metadata_files_with_versions.emplace_back(std::stoi(version_str), path); + } + + /// Get the latest version of metadata file: v.metadata.json + return *std::max_element(metadata_files_with_versions.begin(), metadata_files_with_versions.end()); +} + +} + +std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_) +{ + const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration); + LOG_DEBUG(&Poco::Logger::get("IcebergMetadata"), "Parse metadata {}", metadata_file_path); + auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration); + String json_str; + readJSONObjectPossiblyInvalid(json_str, *buf); + + Poco::JSON::Parser parser; /// For some reason base/base/JSON.h can not parse this json file + Poco::Dynamic::Var json = parser.parse(json_str); + Poco::JSON::Object::Ptr object = json.extract(); + + auto format_version = object->getValue("format-version"); + auto [schema, schema_id] = parseTableSchema(object, format_version); + + auto current_snapshot_id = object->getValue("current-snapshot-id"); + auto snapshots = object->get("snapshots").extract(); + + String manifest_list_file; + for (size_t i = 0; i < snapshots->size(); ++i) + { + const auto snapshot = snapshots->getObject(static_cast(i)); + if (snapshot->getValue("snapshot-id") == current_snapshot_id) + { + const auto path = snapshot->getValue("manifest-list"); + manifest_list_file = std::filesystem::path(configuration.url.key) / "metadata" / std::filesystem::path(path).filename(); + break; + } + } + + return std::make_unique(configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema); +} + +/** + * Manifest file has the following format: '/iceberg_data/db/table_name/metadata/c87bfec7-d36c-4075-ad04-600b6b0f2020-m0.avro' + * + * `manifest file` is different in format version V1 and V2 and has the following contents: + * v1 v2 + * status req req + * snapshot_id req opt + * sequence_number opt + * file_sequence_number opt + * data_file req req + * Example format version V1: + * ┌─status─┬─────────snapshot_id─┬─data_file───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ + * │ 1 │ 2819310504515118887 │ ('/iceberg_data/db/table_name/data/00000-1-3edca534-15a0-4f74-8a28-4733e0bf1270-00001.parquet','PARQUET',(),100,1070,67108864,[(1,233),(2,210)],[(1,100),(2,100)],[(1,0),(2,0)],[],[(1,'\0'),(2,'0')],[(1,'c'),(2,'99')],NULL,[4],0) │ + * └────────┴─────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + * Example format version V2: + * ┌─status─┬─────────snapshot_id─┬─sequence_number─┬─file_sequence_number─┬─data_file───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ + * │ 1 │ 5887006101709926452 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ (0,'/iceberg_data/db/table_name/data/00000-1-c8045c90-8799-4eac-b957-79a0484e223c-00001.parquet','PARQUET',(),100,1070,[(1,233),(2,210)],[(1,100),(2,100)],[(1,0),(2,0)],[],[(1,'\0'),(2,'0')],[(1,'c'),(2,'99')],NULL,[4],[],0) │ + * └────────┴─────────────────────┴─────────────────┴──────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + * In case of partitioned data we'll have extra directory partition=value: + * ─status─┬─────────snapshot_id─┬─data_file──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ + * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=0/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00001.parquet','PARQUET',(0),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0\0'),(2,'1')],[(1,'\0\0\0\0\0\0\0\0'),(2,'1')],NULL,[4],0) │ + * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=1/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00002.parquet','PARQUET',(1),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'2')],[(1,'\0\0\0\0\0\0\0'),(2,'2')],NULL,[4],0) │ + * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=2/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00003.parquet','PARQUET',(2),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'3')],[(1,'\0\0\0\0\0\0\0'),(2,'3')],NULL,[4],0) │ + * └────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + */ +Strings IcebergMetadata::getDataFiles() +{ + if (!data_files.empty()) + return data_files; + + Strings manifest_files; + if (manifest_list_file.empty()) + return data_files; + + LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file); + + auto manifest_list_buf = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_list_file, getContext(), configuration); + auto manifest_list_file_reader = std::make_unique(std::make_unique(*manifest_list_buf)); + + auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0)); + Block header{{data_type->createColumn(), data_type, "manifest_path"}}; + auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(getContext())); + auto & col = columns.at(0); + + if (col->getDataType() != TypeIndex::String) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `manifest_path` field should be String type, got {}", + col->getFamilyName()); + } + + const auto * col_str = typeid_cast(col.get()); + for (size_t i = 0; i < col_str->size(); ++i) + { + const auto file_path = col_str->getDataAt(i).toView(); + const auto filename = std::filesystem::path(file_path).filename(); + manifest_files.emplace_back(std::filesystem::path(configuration.url.key) / "metadata" / filename); + } + + NameSet files; + LOG_TEST(log, "Collect data files"); + for (const auto & manifest_file : manifest_files) + { + LOG_TEST(log, "Process manifest file {}", manifest_file); + + auto buffer = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_file, getContext(), configuration); + auto manifest_file_reader = std::make_unique(std::make_unique(*buffer)); + + /// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema, + /// so we should check if all manifest files have the same schema as in table metadata. + auto avro_metadata = manifest_file_reader->metadata(); + std::vector schema_json = avro_metadata["schema"]; + String schema_json_string = String(reinterpret_cast(schema_json.data()), schema_json.size()); + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(schema_json_string); + Poco::JSON::Object::Ptr schema_object = json.extract(); + if (schema_object->getValue("schema-id") != current_schema_id) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not supported"); + + avro::NodePtr root_node = manifest_file_reader->dataSchema().root(); + size_t leaves_num = root_node->leaves(); + size_t expected_min_num = format_version == 1 ? 3 : 2; + if (leaves_num < expected_min_num) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unexpected number of columns {}. Expected at least {}", + root_node->leaves(), expected_min_num); + } + + avro::NodePtr status_node = root_node->leafAt(0); + if (status_node->type() != avro::Type::AVRO_INT) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `status` field should be Int type, got {}", + magic_enum::enum_name(status_node->type())); + } + + avro::NodePtr data_file_node = root_node->leafAt(static_cast(leaves_num) - 1); + if (data_file_node->type() != avro::Type::AVRO_RECORD) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `data_file` field should be Tuple type, got {}", + magic_enum::enum_name(data_file_node->type())); + } + + auto status_col_data_type = AvroSchemaReader::avroNodeToDataType(status_node); + auto data_col_data_type = AvroSchemaReader::avroNodeToDataType(data_file_node); + Block manifest_file_header + = {{status_col_data_type->createColumn(), status_col_data_type, "status"}, + {data_col_data_type->createColumn(), data_col_data_type, "data_file"}}; + + columns = parseAvro(*manifest_file_reader, manifest_file_header, getFormatSettings(getContext())); + if (columns.size() != 2) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected number of columns. Expected 2, got {}", columns.size()); + + if (columns.at(0)->getDataType() != TypeIndex::Int32) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `status` field should be Int32 type, got {}", + columns.at(0)->getFamilyName()); + } + if (columns.at(1)->getDataType() != TypeIndex::Tuple) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `file_path` field should be Tuple type, got {}", + columns.at(1)->getFamilyName()); + } + + const auto * status_int_column = assert_cast(columns.at(0).get()); + const auto & data_file_tuple_type = assert_cast(*data_col_data_type.get()); + const auto * data_file_tuple_column = assert_cast(columns.at(1).get()); + + if (status_int_column->size() != data_file_tuple_column->size()) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `file_path` and `status` have different rows number: {} and {}", + status_int_column->size(), + data_file_tuple_column->size()); + } + + ColumnPtr file_path_column = data_file_tuple_column->getColumnPtr(data_file_tuple_type.getPositionByName("file_path")); + + if (file_path_column->getDataType() != TypeIndex::String) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `file_path` field should be String type, got {}", + file_path_column->getFamilyName()); + } + + const auto * file_path_string_column = assert_cast(file_path_column.get()); + + ColumnPtr content_column; + const ColumnInt32 * content_int_column = nullptr; + if (format_version == 2) + { + content_column = data_file_tuple_column->getColumnPtr(data_file_tuple_type.getPositionByName("content")); + if (content_column->getDataType() != TypeIndex::Int32) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "The parsed column from Avro file of `content` field should be Int type, got {}", + content_column->getFamilyName()); + } + + content_int_column = assert_cast(content_column.get()); + } + + for (size_t i = 0; i < data_file_tuple_column->size(); ++i) + { + if (format_version == 2) + { + Int32 content_type = content_int_column->getElement(i); + if (DataFileContent(content_type) != DataFileContent::DATA) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: positional and equality deletes are not supported"); + } + + const auto status = status_int_column->getInt(i); + const auto data_path = std::string(file_path_string_column->getDataAt(i).toView()); + const auto pos = data_path.find(configuration.url.key); + const auto file_path = data_path.substr(pos); + if (pos == std::string::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); + + if (ManifestEntryStatus(status) == ManifestEntryStatus::DELETED) + { + LOG_TEST(log, "Processing delete file for path: {}", file_path); + chassert(!files.contains(file_path)); + } + else + { + LOG_TEST(log, "Processing data file for path: {}", file_path); + files.insert(file_path); + } + } + } + + data_files = std::vector(files.begin(), files.end()); + return data_files; +} + +} + +#endif diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h new file mode 100644 index 00000000000..d42ad84f472 --- /dev/null +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h @@ -0,0 +1,95 @@ +#pragma once + +#if USE_AWS_S3 && USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. + +#include +#include +#include + +namespace DB +{ + +/** + * Useful links: + * - https://iceberg.apache.org/spec/ + * + * Iceberg has two format versions, v1 and v2. The content of metadata files depends on the version. + * + * Unlike DeltaLake, Iceberg has several metadata layers: `table metadata`, `manifest list` and `manifest_files`. + * Metadata file - json file. + * Manifest list – an Avro file that lists manifest files; one per snapshot. + * Manifest file – an Avro file that lists data or delete files; a subset of a snapshot. + * All changes to table state create a new metadata file and replace the old metadata with an atomic swap. + * + * In order to find out which data files to read, we need to find the `manifest list` + * which corresponds to the latest snapshot. We find it by checking a list of snapshots + * in metadata's "snapshots" section. + * + * Example of metadata.json file. + * { + * "format-version" : 1, + * "table-uuid" : "ca2965ad-aae2-4813-8cf7-2c394e0c10f5", + * "location" : "/iceberg_data/db/table_name", + * "last-updated-ms" : 1680206743150, + * "last-column-id" : 2, + * "schema" : { "type" : "struct", "schema-id" : 0, "fields" : [ {}, {}, ... ] }, + * "current-schema-id" : 0, + * "schemas" : [ ], + * ... + * "current-snapshot-id" : 2819310504515118887, + * "refs" : { "main" : { "snapshot-id" : 2819310504515118887, "type" : "branch" } }, + * "snapshots" : [ { + * "snapshot-id" : 2819310504515118887, + * "timestamp-ms" : 1680206743150, + * "summary" : { + * "operation" : "append", "spark.app.id" : "local-1680206733239", + * "added-data-files" : "1", "added-records" : "100", + * "added-files-size" : "1070", "changed-partition-count" : "1", + * "total-records" : "100", "total-files-size" : "1070", "total-data-files" : "1", "total-delete-files" : "0", + * "total-position-deletes" : "0", "total-equality-deletes" : "0" + * }, + * "manifest-list" : "/iceberg_data/db/table_name/metadata/snap-2819310504515118887-1-c87bfec7-d36c-4075-ad04-600b6b0f2020.avro", + * "schema-id" : 0 + * } ], + * "statistics" : [ ], + * "snapshot-log" : [ ... ], + * "metadata-log" : [ ] + * } + */ +class IcebergMetadata : WithContext +{ +public: + IcebergMetadata(const StorageS3::Configuration & configuration_, + ContextPtr context_, + Int32 metadata_version_, + Int32 format_version_, + String manifest_list_file_, + Int32 current_schema_id_, + NamesAndTypesList schema_); + + /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files. + /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file) + Strings getDataFiles(); + + /// Get table schema parsed from metadata. + NamesAndTypesList getTableSchema() const { return schema; } + + size_t getVersion() const { return metadata_version; } + +private: + const StorageS3::Configuration configuration; + Int32 metadata_version; + Int32 format_version; + String manifest_list_file; + Int32 current_schema_id; + NamesAndTypesList schema; + Strings data_files; + Poco::Logger * log; + +}; + +std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context); + +} + +#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp new file mode 100644 index 00000000000..165ecce5142 --- /dev/null +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -0,0 +1,70 @@ +#include + +#if USE_AWS_S3 && USE_AVRO + +namespace DB +{ + +StoragePtr StorageIceberg::create( + const DB::StorageIceberg::Configuration & base_configuration, + DB::ContextPtr context_, + const DB::StorageID & table_id_, + const DB::ColumnsDescription & columns_, + const DB::ConstraintsDescription & constraints_, + const DB::String & comment, + std::optional format_settings_) +{ + auto configuration{base_configuration}; + configuration.update(context_); + auto metadata = parseIcebergMetadata(configuration, context_); + auto schema_from_metadata = metadata->getTableSchema(); + configuration.keys = metadata->getDataFiles(); + return std::make_shared(std::move(metadata), configuration, context_, table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment, format_settings_); +} + +StorageIceberg::StorageIceberg( + std::unique_ptr metadata_, + const Configuration & configuration_, + ContextPtr context_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_) + : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_) + , current_metadata(std::move(metadata_)) + , base_configuration(configuration_) +{ +} + +ColumnsDescription StorageIceberg::getTableStructureFromData( + Configuration & base_configuration, + const std::optional &, + ContextPtr local_context) +{ + auto configuration{base_configuration}; + configuration.update(local_context); + auto metadata = parseIcebergMetadata(configuration, local_context); + return ColumnsDescription(metadata->getTableSchema()); +} + +void StorageIceberg::updateConfigurationImpl(ContextPtr local_context) +{ + const bool updated = base_configuration.update(local_context); + auto new_metadata = parseIcebergMetadata(base_configuration, local_context); + /// Check if nothing was changed. + if (updated && new_metadata->getVersion() == current_metadata->getVersion()) + return; + + if (new_metadata->getVersion() != current_metadata->getVersion()) + current_metadata = std::move(new_metadata); + + auto updated_configuration{base_configuration}; + /// If metadata wasn't changed, we won't list data files again. + updated_configuration.keys = current_metadata->getDataFiles(); + StorageS3::useConfiguration(updated_configuration); +} + +} + +#endif diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h new file mode 100644 index 00000000000..a18865b5a54 --- /dev/null +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -0,0 +1,84 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 && USE_AVRO + +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) +/// Right now it's implemented on top of StorageS3 and right now it doesn't support +/// many Iceberg features like schema evolution, partitioning, positional and equality deletes. +/// TODO: Implement Iceberg as a separate storage using IObjectStorage +/// (to support all object storages, not only S3) and add support for missing Iceberg features. +class StorageIceberg : public StorageS3 +{ +public: + static constexpr auto name = "Iceberg"; + + using Configuration = StorageS3::Configuration; + + static StoragePtr create(const Configuration & base_configuration, + ContextPtr context_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_); + + StorageIceberg( + std::unique_ptr metadata_, + const Configuration & configuration_, + ContextPtr context_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + std::optional format_settings_); + + String getName() const override { return name; } + + static ColumnsDescription getTableStructureFromData( + Configuration & base_configuration, + const std::optional &, + ContextPtr local_context); + + static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context) + { + return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false); + } + + Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override + { + std::lock_guard lock(configuration_update_mutex); + updateConfigurationImpl(local_context); + return StorageS3::getConfiguration(); + } + + void updateConfiguration(ContextPtr local_context) override + { + std::lock_guard lock(configuration_update_mutex); + updateConfigurationImpl(local_context); + } + +private: + void updateConfigurationImpl(ContextPtr local_context); + + std::unique_ptr current_metadata; + Configuration base_configuration; + std::mutex configuration_update_mutex; +}; + +} + +#endif diff --git a/src/Storages/DataLakes/IcebergMetadataParser.cpp b/src/Storages/DataLakes/IcebergMetadataParser.cpp deleted file mode 100644 index c4194cd7814..00000000000 --- a/src/Storages/DataLakes/IcebergMetadataParser.cpp +++ /dev/null @@ -1,361 +0,0 @@ -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; - extern const int ILLEGAL_COLUMN; - extern const int BAD_ARGUMENTS; -} - -template -struct IcebergMetadataParser::Impl -{ - Poco::Logger * log = &Poco::Logger::get("IcebergMetadataParser"); - - /** - * Useful links: - * - https://iceberg.apache.org/spec/ - */ - - /** - * Iceberg has two format versions, currently we support only format V1. - * - * Unlike DeltaLake, Iceberg has several metadata layers: `table metadata`, `manifest list` and `manifest_files`. - * Metadata file - json file. - * Manifest list – a file that lists manifest files; one per snapshot. - * Manifest file – a file that lists data or delete files; a subset of a snapshot. - * All changes to table state create a new metadata file and replace the old metadata with an atomic swap. - */ - - static constexpr auto metadata_directory = "metadata"; - - /** - * Each version of table metadata is stored in a `metadata` directory and - * has format: v.metadata.json, where V - metadata version. - */ - String getMetadataFile(const Configuration & configuration) - { - static constexpr auto metadata_file_suffix = ".metadata.json"; - - const auto metadata_files = MetadataReadHelper::listFiles(configuration, metadata_directory, metadata_file_suffix); - if (metadata_files.empty()) - { - throw Exception( - ErrorCodes::FILE_DOESNT_EXIST, - "The metadata file for Iceberg table with path {} doesn't exist", - configuration.url.key); - } - - /// Get the latest version of metadata file: v.metadata.json - return *std::max_element(metadata_files.begin(), metadata_files.end()); - } - - /** - * In order to find out which data files to read, we need to find the `manifest list` - * which corresponds to the latest snapshot. We find it by checking a list of snapshots - * in metadata's "snapshots" section. - * - * Example of metadata.json file. - * - * { - * "format-version" : 1, - * "table-uuid" : "ca2965ad-aae2-4813-8cf7-2c394e0c10f5", - * "location" : "/iceberg_data/db/table_name", - * "last-updated-ms" : 1680206743150, - * "last-column-id" : 2, - * "schema" : { "type" : "struct", "schema-id" : 0, "fields" : [ {}, {}, ... ] }, - * "current-schema-id" : 0, - * "schemas" : [ ], - * ... - * "current-snapshot-id" : 2819310504515118887, - * "refs" : { "main" : { "snapshot-id" : 2819310504515118887, "type" : "branch" } }, - * "snapshots" : [ { - * "snapshot-id" : 2819310504515118887, - * "timestamp-ms" : 1680206743150, - * "summary" : { - * "operation" : "append", "spark.app.id" : "local-1680206733239", - * "added-data-files" : "1", "added-records" : "100", - * "added-files-size" : "1070", "changed-partition-count" : "1", - * "total-records" : "100", "total-files-size" : "1070", "total-data-files" : "1", "total-delete-files" : "0", - * "total-position-deletes" : "0", "total-equality-deletes" : "0" - * }, - * "manifest-list" : "/iceberg_data/db/table_name/metadata/snap-2819310504515118887-1-c87bfec7-d36c-4075-ad04-600b6b0f2020.avro", - * "schema-id" : 0 - * } ], - * "statistics" : [ ], - * "snapshot-log" : [ ... ], - * "metadata-log" : [ ] - * } - */ - struct Metadata - { - int format_version; - String manifest_list; - Strings manifest_files; - }; - Metadata processMetadataFile(const Configuration & configuration, ContextPtr context) - { - const auto metadata_file_path = getMetadataFile(configuration); - auto buf = MetadataReadHelper::createReadBuffer(metadata_file_path, context, configuration); - String json_str; - readJSONObjectPossiblyInvalid(json_str, *buf); - - Poco::JSON::Parser parser; /// For some reason base/base/JSON.h can not parse this json file - Poco::Dynamic::Var json = parser.parse(json_str); - Poco::JSON::Object::Ptr object = json.extract(); - - Metadata result; - result.format_version = object->getValue("format-version"); - - auto current_snapshot_id = object->getValue("current-snapshot-id"); - auto snapshots = object->get("snapshots").extract(); - - for (size_t i = 0; i < snapshots->size(); ++i) - { - const auto snapshot = snapshots->getObject(static_cast(i)); - if (snapshot->getValue("snapshot-id") == current_snapshot_id) - { - const auto path = snapshot->getValue("manifest-list"); - result.manifest_list = std::filesystem::path(configuration.url.key) / metadata_directory / std::filesystem::path(path).filename(); - break; - } - } - return result; - } - - /** - * Manifest list has Avro as default format (and currently we support only Avro). - * Manifest list file format of manifest list is: snap-2819310504515118887-1-c87bfec7-d36c-4075-ad04-600b6b0f2020.avro - * - * `manifest list` has the following contents: - * ┌─manifest_path────────────────────────────────────────────────────────────────────────────────────────┬─manifest_length─┬─partition_spec_id─┬───added_snapshot_id─┬─added_data_files_count─┬─existing_data_files_count─┬─deleted_data_files_count─┬─partitions─┬─added_rows_count─┬─existing_rows_count─┬─deleted_rows_count─┐ - * │ /iceberg_data/db/table_name/metadata/c87bfec7-d36c-4075-ad04-600b6b0f2020-m0.avro │ 5813 │ 0 │ 2819310504515118887 │ 1 │ 0 │ 0 │ [] │ 100 │ 0 │ 0 │ - * └──────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┴───────────────────┴─────────────────────┴────────────────────────┴───────────────────────────┴──────────────────────────┴────────────┴──────────────────┴─────────────────────┴────────────────────┘ - */ - void processManifestList(Metadata & metadata, const Configuration & configuration, ContextPtr context) - { - auto buf = MetadataReadHelper::createReadBuffer(metadata.manifest_list, context, configuration); - auto file_reader = std::make_unique(std::make_unique(*buf)); - - auto data_type = AvroSchemaReader::avroNodeToDataType(file_reader->dataSchema().root()->leafAt(0)); - Block header{{data_type->createColumn(), data_type, "manifest_path"}}; - auto columns = parseAvro(*file_reader, header, getFormatSettings(context)); - auto & col = columns.at(0); - - if (col->getDataType() != TypeIndex::String) - { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `manifest_path` field should be String type, got {}", - col->getFamilyName()); - } - - const auto * col_str = typeid_cast(col.get()); - for (size_t i = 0; i < col_str->size(); ++i) - { - const auto file_path = col_str->getDataAt(i).toView(); - const auto filename = std::filesystem::path(file_path).filename(); - metadata.manifest_files.emplace_back(std::filesystem::path(configuration.url.key) / metadata_directory / filename); - } - } - - /** - * Manifest file has the following format: '/iceberg_data/db/table_name/metadata/c87bfec7-d36c-4075-ad04-600b6b0f2020-m0.avro' - * - * `manifest file` is different in format version V1 and V2 and has the following contents: - * v1 v2 - * status req req - * snapshot_id req opt - * sequence_number opt - * file_sequence_number opt - * data_file req req - * Example format version V1: - * ┌─status─┬─────────snapshot_id─┬─data_file───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ - * │ 1 │ 2819310504515118887 │ ('/iceberg_data/db/table_name/data/00000-1-3edca534-15a0-4f74-8a28-4733e0bf1270-00001.parquet','PARQUET',(),100,1070,67108864,[(1,233),(2,210)],[(1,100),(2,100)],[(1,0),(2,0)],[],[(1,'\0'),(2,'0')],[(1,'c'),(2,'99')],NULL,[4],0) │ - * └────────┴─────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ - * Example format version V2: - * ┌─status─┬─────────snapshot_id─┬─sequence_number─┬─file_sequence_number─┬─data_file───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ - * │ 1 │ 5887006101709926452 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ (0,'/iceberg_data/db/table_name/data/00000-1-c8045c90-8799-4eac-b957-79a0484e223c-00001.parquet','PARQUET',(),100,1070,[(1,233),(2,210)],[(1,100),(2,100)],[(1,0),(2,0)],[],[(1,'\0'),(2,'0')],[(1,'c'),(2,'99')],NULL,[4],[],0) │ - * └────────┴─────────────────────┴─────────────────┴──────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ - * In case of partitioned data we'll have extra directory partition=value: - * ─status─┬─────────snapshot_id─┬─data_file──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ - * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=0/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00001.parquet','PARQUET',(0),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0\0'),(2,'1')],[(1,'\0\0\0\0\0\0\0\0'),(2,'1')],NULL,[4],0) │ - * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=1/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00002.parquet','PARQUET',(1),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'2')],[(1,'\0\0\0\0\0\0\0'),(2,'2')],NULL,[4],0) │ - * │ 1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=2/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00003.parquet','PARQUET',(2),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'3')],[(1,'\0\0\0\0\0\0\0'),(2,'3')],NULL,[4],0) │ - * └────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ - */ - Strings getFilesForRead(const Metadata & metadata, const Configuration & configuration, ContextPtr context) - { - NameSet keys; - for (const auto & manifest_file : metadata.manifest_files) - { - auto buffer = MetadataReadHelper::createReadBuffer(manifest_file, context, configuration); - auto file_reader = std::make_unique(std::make_unique(*buffer)); - - avro::NodePtr root_node = file_reader->dataSchema().root(); - size_t leaves_num = root_node->leaves(); - size_t expected_min_num = metadata.format_version == 1 ? 3 : 2; - if (leaves_num < expected_min_num) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Unexpected number of columns {}. Expected at least {}", - root_node->leaves(), expected_min_num); - } - - avro::NodePtr status_node = root_node->leafAt(0); - if (status_node->type() != avro::Type::AVRO_INT) - { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `status` field should be Int type, got {}", - magic_enum::enum_name(status_node->type())); - } - - avro::NodePtr data_file_node = root_node->leafAt(static_cast(leaves_num) - 1); - if (data_file_node->type() != avro::Type::AVRO_RECORD) - { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `data_file` field should be Tuple type, got {}", - magic_enum::enum_name(data_file_node->type())); - } - - auto status_col_data_type = AvroSchemaReader::avroNodeToDataType(status_node); - auto data_col_data_type = AvroSchemaReader::avroNodeToDataType(data_file_node); - Block header{ - {status_col_data_type->createColumn(), status_col_data_type, "status"}, - {data_col_data_type->createColumn(), data_col_data_type, "data_file"}}; - - const auto columns = parseAvro(*file_reader, header, getFormatSettings(context)); - if (columns.size() != 2) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Unexpected number of columns. Expected 2, got {}", columns.size()); - } - - if (columns.at(0)->getDataType() != TypeIndex::Int32) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `status` field should be Int32 type, got {}", - columns.at(0)->getFamilyName()); - } - if (columns.at(1)->getDataType() != TypeIndex::Tuple) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `file_path` field should be Tuple type, got {}", - columns.at(1)->getFamilyName()); - } - - const auto status_int_column = assert_cast(columns.at(0).get()); - const auto data_file_tuple_column = assert_cast(columns.at(1).get()); - - if (status_int_column->size() != data_file_tuple_column->size()) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `file_path` and `status` have different rows number: {} and {}", - status_int_column->size(), data_file_tuple_column->size()); - } - - const auto * data_file_name_column = metadata.format_version == 1 - ? data_file_tuple_column->getColumnPtr(0).get() - : data_file_tuple_column->getColumnPtr(1).get(); - - if (data_file_name_column->getDataType() != TypeIndex::String) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The parsed column from Avro file of `file_path` field should be String type, got {}", - data_file_name_column->getFamilyName()); - } - auto file_name_str_column = assert_cast(data_file_name_column); - - for (size_t i = 0; i < status_int_column->size(); ++i) - { - const auto status = status_int_column->getInt(i); - const auto data_path = std::string(file_name_str_column->getDataAt(i).toView()); - const auto pos = data_path.find(configuration.url.key); - const auto file_path = data_path.substr(pos); - if (pos == std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); - - if (status == 2) - { - LOG_TEST(log, "Processing delete file for path: {}", file_path); - chassert(!keys.contains(file_path)); - } - else - keys.insert(file_path); - } - } - - return std::vector(keys.begin(), keys.end()); - } - - MutableColumns parseAvro( - avro::DataFileReaderBase & file_reader, - const Block & header, - const FormatSettings & settings) - { - auto deserializer = std::make_unique(header, file_reader.dataSchema(), true, true, settings); - MutableColumns columns = header.cloneEmptyColumns(); - - file_reader.init(); - RowReadExtension ext; - while (file_reader.hasMore()) - { - file_reader.decr(); - deserializer->deserializeRow(columns, file_reader.decoder(), ext); - } - return columns; - } - -}; - - -template -IcebergMetadataParser::IcebergMetadataParser() : impl(std::make_unique()) -{ -} - -template -Strings IcebergMetadataParser::getFiles(const Configuration & configuration, ContextPtr context) -{ - auto metadata = impl->processMetadataFile(configuration, context); - - /// When table first created and does not have any data - if (metadata.manifest_list.empty()) - return {}; - - impl->processManifestList(metadata, configuration, context); - return impl->getFilesForRead(metadata, configuration, context); -} - - -template IcebergMetadataParser::IcebergMetadataParser(); -template Strings IcebergMetadataParser::getFiles(const StorageS3::Configuration & configuration, ContextPtr); - -} - -#endif diff --git a/src/Storages/DataLakes/IcebergMetadataParser.h b/src/Storages/DataLakes/IcebergMetadataParser.h deleted file mode 100644 index 226b1bd8b6c..00000000000 --- a/src/Storages/DataLakes/IcebergMetadataParser.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. - -#include -#include - -namespace DB -{ - -template -struct IcebergMetadataParser -{ -public: - IcebergMetadataParser(); - - Strings getFiles(const Configuration & configuration, ContextPtr context); - -private: - struct Impl; - std::shared_ptr impl; -}; - -} - -#endif diff --git a/src/Storages/DataLakes/StorageIceberg.h b/src/Storages/DataLakes/StorageIceberg.h deleted file mode 100644 index f1c9c485ef0..00000000000 --- a/src/Storages/DataLakes/StorageIceberg.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include -#include -#include -#include "config.h" - -#if USE_AWS_S3 && USE_AVRO -#include -#include -#endif - -namespace DB -{ - -struct StorageIcebergName -{ - static constexpr auto name = "Iceberg"; -}; - -#if USE_AWS_S3 && USE_AVRO -using StorageIcebergS3 = IStorageDataLake>; -#endif - -} diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp index 1447a4777c5..118600f7212 100644 --- a/src/Storages/DataLakes/registerDataLakes.cpp +++ b/src/Storages/DataLakes/registerDataLakes.cpp @@ -4,7 +4,7 @@ #if USE_AWS_S3 #include -#include +#include #include @@ -35,7 +35,7 @@ void registerStorageDeltaLake(StorageFactory & factory) void registerStorageIceberg(StorageFactory & factory) { - REGISTER_DATA_LAKE_STORAGE(StorageIcebergS3, StorageIcebergName::name) + REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name) } #endif diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 75330ed7db1..730b694ef87 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -660,7 +660,6 @@ bool HDFSSource::initialize() max_parsing_threads = 1; input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads); - input_format->setQueryInfo(query_info, getContext()); if (need_only_count) input_format->needOnlyCount(); diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 28fa010b6d2..1587354452e 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -280,7 +280,6 @@ public: auto input_format = FormatFactory::instance().getInput( format, *read_buf, to_read_block, getContext(), max_block_size, updateFormatSettings(current_file), /* max_parsing_threads */ 1); - input_format->setQueryInfo(query_info, getContext()); Pipe pipe(input_format); if (columns_description.hasDefaults()) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index e98b38438b9..75afa00565b 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -168,7 +168,7 @@ void IStorage::readFromPipe( } else { - auto read_step = std::make_unique(std::move(pipe), storage_name, query_info.storage_limits); + auto read_step = std::make_unique(std::move(pipe), storage_name, query_info, context); query_plan.addStep(std::move(read_step)); } } @@ -276,6 +276,16 @@ bool IStorage::isStaticStorage() const return false; } +IStorage::DataValidationTasksPtr IStorage::getCheckTaskList(const ASTPtr & /* query */, ContextPtr /* context */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Check query is not supported for {} storage", getName()); +} + +std::optional IStorage::checkDataNext(DataValidationTasksPtr & /* check_task_list */) +{ + return {}; +} + void IStorage::adjustCreateQueryForBackup(ASTPtr &) const { } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 92920c50ba9..803ab5e92ba 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -600,8 +600,44 @@ public: /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const { return false; } - /// Checks validity of the data - virtual CheckResults checkData(const ASTPtr & /* query */, ContextPtr /* context */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Check query is not supported for {} storage", getName()); } + + /** A list of tasks to check a validity of data. + * Each IStorage implementation may interpret this task in its own way. + * E.g. for some storages it to check data it need to check a list of files in filesystem, for others it can be a list of parts. + * Also it may hold resources (e.g. locks) required during check. + */ + struct DataValidationTasksBase + { + /// Number of entries left to check. + /// It decreases after each call to checkDataNext(). + virtual size_t size() const = 0; + virtual ~DataValidationTasksBase() = default; + }; + + using DataValidationTasksPtr = std::shared_ptr; + + virtual DataValidationTasksPtr getCheckTaskList(const ASTPtr & /* query */, ContextPtr /* context */); + + /** Executes one task from the list. + * If no tasks left - returns nullopt. + * Note: Function `checkDataNext` is accessing `check_task_list` thread-safely, + * and can be called simultaneously for the same `getCheckTaskList` result + * to process different tasks in parallel. + * Usage: + * + * auto check_task_list = storage.getCheckTaskList(query, context); + * size_t total_tasks = check_task_list->size(); + * while (true) + * { + * size_t tasks_left = check_task_list->size(); + * std::cout << "Checking data: " << (total_tasks - tasks_left) << " / " << total_tasks << " tasks done." << std::endl; + * auto result = storage.checkDataNext(check_task_list); + * if (!result) + * break; + * doSomething(*result); + * } + */ + virtual std::optional checkDataNext(DataValidationTasksPtr & check_task_list); /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. diff --git a/src/Storages/MergeTree/IExecutableTask.h b/src/Storages/MergeTree/IExecutableTask.h index 738056e0ea0..ee453e45071 100644 --- a/src/Storages/MergeTree/IExecutableTask.h +++ b/src/Storages/MergeTree/IExecutableTask.h @@ -30,7 +30,16 @@ class IExecutableTask { public: using TaskResultCallback = std::function; + virtual bool executeStep() = 0; + + /// Sometimes exceptions from the executeStep() had been already printed to + /// the log, but with different level (see + /// ReplicatedMergeMutateTaskBase::executeStep()), but the exception should + /// be throw, since there are some sanity assertions based on the + /// std::uncaught_exceptions() (i.e. WriteBuffer::~WriteBuffer()) + virtual bool printExecutionException() const { return true; } + virtual void onCompleted() = 0; virtual StorageID getStorageID() const = 0; virtual String getQueryId() const = 0; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2d643454ecd..666d9d3815d 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -690,7 +690,6 @@ static ActionsDAGPtr cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs return res; } - /** Calculate expressions, that depend only on constants. * For index to work when something like "WHERE Date = toDate(now())" is written. */ diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 93b07b1b8fd..6bc044c4dd4 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -160,7 +160,6 @@ public: bool matchesExactContinuousRange() const; -private: /// The expression is stored as Reverse Polish Notation. struct RPNElement { @@ -207,11 +206,12 @@ private: using RPN = std::vector; using ColumnIndices = std::map; - -public: using AtomMap = std::unordered_map; static const AtomMap atom_map; + const RPN & getRPN() const { return rpn; } + const ColumnIndices & getKeyColumns() const { return key_columns; } + private: BoolMask checkInRange( size_t used_key_size, diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 2df0a6c1c1f..1a7a0b5b2c1 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -281,7 +281,8 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log, query_id); + if (item->task->printExecutionException()) + printExceptionWithRespectToAbort(log, query_id); /// Release the task with exception context. /// An exception context is needed to proper delete write buffers without finalization release_task(std::move(item)); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9bb4d1b9418..b63bb06df9d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,25 +1,27 @@ #include #include +#include +#include #include #include #include #include #include -#include +#include +#include #include -#include #include -#include -#include #include #include #include -#include -#include #include +#include #include -#include +#include +#include +#include +#include #include #include #include @@ -27,31 +29,29 @@ #include #include #include -#include #include #include -#include +#include #include #include +#include #include +#include +#include +#include +#include +#include #include #include #include #include -#include #include -#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include +#include #include #include #include @@ -65,25 +65,24 @@ #include #include #include -#include #include #include #include #include +#include #include -#include -#include +#include +#include #include #include #include -#include +#include +#include +#include #include #include #include #include -#include -#include -#include #include #include @@ -1386,6 +1385,9 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( if (to_state == DataPartState::Active) addPartContributionToDataVolume(res.part); + if (res.part->hasLightweightDelete()) + has_lightweight_delete_parts.store(true); + LOG_TRACE(log, "Finished loading {} part {} on disk {}", magic_enum::enum_name(to_state), part_name, part_disk_ptr->getName()); return res; } @@ -2967,9 +2969,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context NamesAndTypesList columns_to_check_conversion; + auto unfinished_mutations = getUnfinishedMutationCommands(); std::optional name_deps{}; for (const AlterCommand & command : commands) { + checkDropCommandDoesntAffectInProgressMutations(command, unfinished_mutations, local_context); /// Just validate partition expression if (command.partition) { @@ -4323,7 +4327,7 @@ void MergeTreeData::delayMutationOrThrowIfNeeded(Poco::Event * until, const Cont if (!num_mutations_to_delay && !num_mutations_to_throw) return; - size_t num_unfinished_mutations = getNumberOfUnfinishedMutations(); + size_t num_unfinished_mutations = getUnfinishedMutationCommands().size(); if (num_mutations_to_throw && num_unfinished_mutations >= num_mutations_to_throw) { ProfileEvents::increment(ProfileEvents::RejectedMutations); @@ -7603,6 +7607,70 @@ bool MergeTreeData::canUsePolymorphicParts() const return canUsePolymorphicParts(*getSettings(), unused); } + +void MergeTreeData::checkDropCommandDoesntAffectInProgressMutations(const AlterCommand & command, const std::map & unfinished_mutations, ContextPtr local_context) const +{ + if (!command.isDropSomething() || unfinished_mutations.empty()) + return; + + auto throw_exception = [] ( + const std::string & mutation_name, + const std::string & entity_name, + const std::string & identifier_name) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot drop {} {} because it's affected by mutation with ID '{}' which is not finished yet. " + "Wait this mutation, or KILL it with command " + "\"KILL MUTATION WHERE mutation_id = '{}'\"", + entity_name, + backQuoteIfNeed(identifier_name), + mutation_name, + mutation_name); + }; + + for (const auto & [mutation_name, commands] : unfinished_mutations) + { + for (const MutationCommand & mutation_command : commands) + { + if (command.type == AlterCommand::DROP_INDEX && mutation_command.index_name == command.index_name) + { + throw_exception(mutation_name, "index", command.index_name); + } + else if (command.type == AlterCommand::DROP_PROJECTION + && mutation_command.projection_name == command.projection_name) + { + throw_exception(mutation_name, "projection", command.projection_name); + } + else if (command.type == AlterCommand::DROP_COLUMN) + { + if (mutation_command.column_name == command.column_name) + throw_exception(mutation_name, "column", command.column_name); + + if (mutation_command.predicate) + { + auto query_tree = buildQueryTree(mutation_command.predicate, local_context); + auto identifiers = collectIdentifiersFullNames(query_tree); + + if (identifiers.contains(command.column_name)) + throw_exception(mutation_name, "column", command.column_name); + } + + for (const auto & [name, expr] : mutation_command.column_to_update_expression) + { + if (name == command.column_name) + throw_exception(mutation_name, "column", command.column_name); + + auto query_tree = buildQueryTree(expr, local_context); + auto identifiers = collectIdentifiersFullNames(query_tree); + if (identifiers.contains(command.column_name)) + throw_exception(mutation_name, "column", command.column_name); + } + } + } + } +} + bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, String & out_reason) const { if (!canUseAdaptiveGranularity()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2639457a376..fbdbf922084 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -47,6 +47,7 @@ namespace DB /// Number of streams is not number parts, but number or parts*files, hence 1000. const size_t DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE = 1000; +struct AlterCommand; class AlterCommands; class InterpreterSelectQuery; class MergeTreePartsMover; @@ -580,9 +581,6 @@ public: /// The decision to delay or throw is made according to settings 'number_of_mutations_to_delay' and 'number_of_mutations_to_throw'. void delayMutationOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const; - /// Returns number of unfinished mutations (is_done = 0). - virtual size_t getNumberOfUnfinishedMutations() const = 0; - /// Renames temporary part to a permanent part and adds it to the parts set. /// It is assumed that the part does not intersect with existing parts. /// Adds the part in the PreActive state (the part will be added to the active set later with out_transaction->commit()). @@ -719,6 +717,13 @@ public: /// If something is wrong, throws an exception. void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; + /// Throw exception if command is some kind of DROP command (drop column, drop index, etc) + /// and we have unfinished mutation which need this column to finish. + void checkDropCommandDoesntAffectInProgressMutations( + const AlterCommand & command, const std::map & unfinished_mutations, ContextPtr context) const; + /// Return mapping unfinished mutation name -> Mutation command + virtual std::map getUnfinishedMutationCommands() const = 0; + /// Checks if the Mutation can be performed. /// (currently no additional checks: always ok) void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index aabf4d379c4..419950c3037 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -474,7 +474,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( auto step = std::make_unique( std::move(pipe), fmt::format("MergeTree(with {} projection {})", query_info.projection->desc->type, query_info.projection->desc->name), - query_info.storage_limits); + query_info, + context); plan->addStep(std::move(step)); plan->addInterpreterContext(query_info.projection->context); return plan; diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 29bcd8b87f1..f2dcb77ee3b 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -396,6 +396,12 @@ size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const { + const size_t total_arguments = getArgumentsSize(); + if (index >= total_arguments) /// Bug #52632 + throw Exception(ErrorCodes::LOGICAL_ERROR, + "RPNBuilderFunctionTreeNode has {} arguments, attempted to get argument at index {}", + total_arguments, index); + if (ast_node) { const auto * ast_function = assert_cast(ast_node); diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 6ad77119016..94c069d789b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -69,10 +69,9 @@ bool ReplicatedMergeMutateTaskBase::executeStep() else tryLogCurrentException(log, __PRETTY_FUNCTION__); - /** This exception will be written to the queue element, and it can be looked up using `system.replication_queue` table. - * The thread that performs this action will sleep a few seconds after the exception. - * See `queue.processEntry` function. - */ + /// This exception will be written to the queue element, and it can be looked up using `system.replication_queue` table. + /// The thread that performs this action will sleep a few seconds after the exception. + /// See `queue.processEntry` function. throw; } catch (...) @@ -121,6 +120,9 @@ bool ReplicatedMergeMutateTaskBase::executeStep() } } + if (retryable_error) + print_exception = false; + if (saved_exception) std::rethrow_exception(saved_exception); diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index ba514f11f20..18fcacecc9e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -37,6 +37,8 @@ public: String getQueryId() const override { return getStorageID().getShortName() + "::" + selected_entry->log_entry->new_part_name; } bool executeStep() override; + bool printExecutionException() const override { return print_exception; } + protected: using PartLogWriter = std::function; @@ -91,6 +93,7 @@ private: PartLogWriter part_log_writer{}; State state{State::NEED_PREPARE}; IExecutableTask::TaskResultCallback task_result_callback; + bool print_exception = true; }; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index aeaeb4168af..bb74c4dd7bb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "Storages/MutationCommands.h" #include #include @@ -557,7 +558,7 @@ bool ReplicatedMergeTreeQueue::removeFailedQuorumPart(const MergeTreePartInfo & return virtual_parts.remove(part_info); } -int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback, PullLogsReason reason) +std::pair ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback, PullLogsReason reason) { std::lock_guard lock(pull_logs_to_queue_mutex); @@ -589,7 +590,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper /// in the queue. /// With this we ensure that if you read the log state L1 and then the state of mutations M1, /// then L1 "happened-before" M1. - updateMutations(zookeeper); + int32_t mutations_version = updateMutations(zookeeper); if (index_str.empty()) { @@ -718,7 +719,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper storage.background_operations_assignee.trigger(); } - return stat.version; + return std::pair{stat.version, mutations_version}; } @@ -857,11 +858,12 @@ ActiveDataPartSet getPartNamesToMutate( } -void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback) +int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback) { std::lock_guard lock(update_mutations_mutex); - Strings entries_in_zk = zookeeper->getChildrenWatch(fs::path(zookeeper_path) / "mutations", nullptr, watch_callback); + Coordination::Stat mutations_stat; + Strings entries_in_zk = zookeeper->getChildrenWatch(fs::path(zookeeper_path) / "mutations", &mutations_stat, watch_callback); StringSet entries_in_zk_set(entries_in_zk.begin(), entries_in_zk.end()); /// Compare with the local state, delete obsolete entries and determine which new entries to load. @@ -976,6 +978,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C if (some_mutations_are_probably_done) storage.mutations_finalizing_task->schedule(); } + return mutations_stat.version; } @@ -1761,22 +1764,21 @@ size_t ReplicatedMergeTreeQueue::countFinishedMutations() const return count; } -size_t ReplicatedMergeTreeQueue::countUnfinishedMutations() const +std::map ReplicatedMergeTreeQueue::getUnfinishedMutations() const { + std::map result; std::lock_guard lock(state_mutex); - size_t count = 0; - for (const auto & [_, status] : mutations_by_znode | std::views::reverse) + for (const auto & [name, status] : mutations_by_znode | std::views::reverse) { if (status.is_done) break; - ++count; + result.emplace(name, status.entry->commands); } - return count; + return result; } - ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zkutil::ZooKeeperPtr & zookeeper, std::optional && partition_ids_hint) { @@ -2211,7 +2213,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( committing_blocks = std::make_shared(getCommittingBlocks(zookeeper, queue.zookeeper_path, queue.log)); - merges_version = queue_.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::MERGE_PREDICATE); + std::tie(merges_version, std::ignore) = queue_.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::MERGE_PREDICATE); { /// We avoid returning here a version to be used in a lightweight transaction. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 29204611200..ae8ae623a30 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -331,11 +331,11 @@ public: * Additionally loads mutations (so that the set of mutations is always more recent than the queue). * Return the version of "logs" node (that is updated for every merge/mutation/... added to the log) */ - int32_t pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}, PullLogsReason reason = OTHER); + std::pair pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}, PullLogsReason reason = OTHER); /// Load new mutation entries. If something new is loaded, schedule storage.merge_selecting_task. /// If watch_callback is not empty, will call it when new mutations appear in ZK. - void updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback = {}); + int32_t updateMutations(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallbackPtr watch_callback = {}); /// Remove a mutation from ZooKeeper and from the local set. Returns the removed entry or nullptr /// if it could not be found. Called during KILL MUTATION query execution. @@ -388,12 +388,12 @@ public: /// Count the total number of active mutations that are finished (is_done = true). size_t countFinishedMutations() const; - /// Count the total number of active mutations that are not finished (is_done = false). - size_t countUnfinishedMutations() const; + + std::map getUnfinishedMutations() const; /// Returns functor which used by MergeTreeMergerMutator to select parts for merge - ReplicatedMergeTreeMergePredicate getMergePredicate(zkutil::ZooKeeperPtr & zookeeper, - std::optional && partition_ids_hint); + ReplicatedMergeTreeMergePredicate + getMergePredicate(zkutil::ZooKeeperPtr & zookeeper, std::optional && partition_ids_hint); MutationCommands getMutationCommands(const MergeTreeData::DataPartPtr & part, Int64 desired_mutation_version, Strings & mutation_ids) const; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 0a182789311..f22d86499c2 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -535,9 +535,20 @@ static StoragePtr create(const StorageFactory::Arguments & args) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); if (!args.storage_def->order_by) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " - "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY ()"); + { + if (args.getLocalContext()->getSettingsRef().create_table_empty_primary_key_by_default) + { + args.storage_def->set(args.storage_def->order_by, makeASTFunction("tuple")); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " + "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY (). " + "Otherwise, you can use the setting 'create_table_empty_primary_key_by_default' to " + "automatically add an empty primary key to the table definition"); + } + } /// Get sorting key from engine arguments. /// diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index a3478069356..a011d1d2c2f 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -346,7 +346,7 @@ void StorageNATS::read( } else { - auto read_step = std::make_unique(std::move(pipe), getName(), query_info.storage_limits); + auto read_step = std::make_unique(std::move(pipe), getName(), query_info, local_context); query_plan.addStep(std::move(read_step)); query_plan.addInterpreterContext(modified_context); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ec552dd1032..f78424359bd 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -757,7 +757,7 @@ void StorageRabbitMQ::read( } else { - auto read_step = std::make_unique(std::move(pipe), getName(), query_info.storage_limits); + auto read_step = std::make_unique(std::move(pipe), getName(), query_info, local_context); query_plan.addStep(std::move(read_step)); query_plan.addInterpreterContext(modified_context); } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index be322a402ee..5e8d54bcdf1 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -609,5 +609,19 @@ void registerStorageEmbeddedRocksDB(StorageFactory & factory) factory.registerStorage("EmbeddedRocksDB", create, features); } +std::optional StorageEmbeddedRocksDB::totalRows(const Settings & settings) const +{ + if (settings.optimize_trivial_approximate_count_query) + { + std::shared_lock lock(rocksdb_ptr_mx); + if (!rocksdb_ptr) + return {}; + UInt64 estimated_rows; + if (!rocksdb_ptr->GetIntProperty("rocksdb.estimate-num-keys", &estimated_rows)) + return {}; + return estimated_rows; + } + return {}; +} } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index d0cf05f261c..336f6a8abe3 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -83,6 +83,10 @@ public: bool supportsDelete() const override { return true; } + bool supportsTrivialCountOptimization() const override { return true; } + + std::optional totalRows(const Settings & settings) const override; + private: const String primary_key; using RocksDBPtr = std::unique_ptr; diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index 6ea222df71f..5d957d885f5 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -99,11 +99,6 @@ StorageS3QueueSource::StorageS3QueueSource( , remove_file_func(remove_file_func_) , log(&Poco::Logger::get("StorageS3QueueSource")) { - reader = std::move(internal_source->reader); - if (reader) - { - reader_future = std::move(internal_source->reader_future); - } } StorageS3QueueSource::~StorageS3QueueSource() @@ -116,8 +111,22 @@ String StorageS3QueueSource::getName() const return name; } +void StorageS3QueueSource::lazyInitialize() +{ + if (initialized) + return; + + internal_source->lazyInitialize(); + reader = std::move(internal_source->reader); + if (reader) + reader_future = std::move(internal_source->reader_future); + initialized = true; +} + Chunk StorageS3QueueSource::generate() { + lazyInitialize(); + while (true) { if (!reader) diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 7c8eb3eeb74..8af5256899a 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -93,8 +93,10 @@ private: using ReaderHolder = StorageS3Source::ReaderHolder; ReaderHolder reader; std::future reader_future; + std::atomic initialized{false}; size_t processed_rows_from_file = 0; + void lazyInitialize(); void applyActionAfterProcessing(const String & path); void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); }; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 796c732ba3f..b005f1a4445 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -1137,7 +1137,6 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() format, *read_buf, sample_block, getContext(), max_block_size, format_settings, max_parsing_threads, std::nullopt, /* is_remote_fs */ true, compression_method); - input_format->setQueryInfo(query_info, getContext()); if (need_only_count) input_format->needOnlyCount(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 1f4d14218aa..179c23a974b 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -981,7 +981,7 @@ static std::chrono::seconds getLockTimeout(ContextPtr context) using StorageFilePtr = std::shared_ptr; -class StorageFileSource : public ISource +class StorageFileSource : public SourceWithKeyCondition { public: class FilesIterator @@ -1056,7 +1056,7 @@ public: FilesIteratorPtr files_iterator_, std::unique_ptr read_buf_, bool need_only_count_) - : ISource(info.source_header, false) + : SourceWithKeyCondition(info.source_header, false) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) , files_iterator(std::move(files_iterator_)) @@ -1143,6 +1143,17 @@ public: return storage->getName(); } + void setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_) override + { + setKeyConditionImpl(query_info_, context_, block_for_format); + } + + void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override + { + setKeyConditionImpl(nodes, context_, block_for_format); + } + + bool tryGetCountFromCache(const struct stat & file_stat) { if (!context->getSettingsRef().use_cache_for_count_from_files) @@ -1296,7 +1307,10 @@ public: input_format = FormatFactory::instance().getInput( storage->format_name, *read_buf, block_for_format, context, max_block_size, storage->format_settings, max_parsing_threads, std::nullopt, /*is_remote_fs*/ false, CompressionMethod::None, need_only_count); - input_format->setQueryInfo(query_info, context); + + if (key_condition) + input_format->setKeyCondition(key_condition); + if (need_only_count) input_format->needOnlyCount(); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 4626d744a38..dc68c68a21b 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -85,6 +85,8 @@ public: const Names & getKeyNames() const { return key_names; } + bool supportsTrivialCountOptimization() const override { return true; } + private: Block sample_block; const Names key_names; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index cf76f7a16ba..10f62cc9432 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -7,6 +7,8 @@ #include +#include + #include #include #include @@ -57,6 +59,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; extern const int CANNOT_RESTORE_TABLE; + extern const int NOT_IMPLEMENTED; } /// NOTE: The lock `StorageLog::rwlock` is NOT kept locked while reading, @@ -874,15 +877,23 @@ SinkToStoragePtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetada return std::make_shared(*this, metadata_snapshot, std::move(lock)); } -CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr local_context) +IStorage::DataValidationTasksPtr StorageLog::getCheckTaskList(const ASTPtr & query, ContextPtr local_context) { + const auto * check_query = query->as(); + if (check_query->partition || !check_query->part_name.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CHECK PART/PARTITION are not supported for {}", getName()); + ReadLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); - return file_checker.check(); + return std::make_unique(file_checker.getDataValidationTasks(), std::move(lock)); } +std::optional StorageLog::checkDataNext(DataValidationTasksPtr & check_task_list) +{ + return file_checker.checkNextEntry(assert_cast(check_task_list.get())->file_checker_tasks); +} IStorage::ColumnSizeByName StorageLog::getColumnSizes() const { diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index f1d05ed39ac..ee5bcc009e7 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -59,7 +59,8 @@ public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; - CheckResults checkData(const ASTPtr & query, ContextPtr local_context) override; + DataValidationTasksPtr getCheckTaskList(const ASTPtr & query, ContextPtr context) override; + std::optional checkDataNext(DataValidationTasksPtr & check_task_list) override; void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; @@ -142,6 +143,19 @@ private: std::atomic total_rows = 0; std::atomic total_bytes = 0; + struct DataValidationTasks : public IStorage::DataValidationTasksBase + { + DataValidationTasks(FileChecker::DataValidationTasksPtr file_checker_tasks_, ReadLock && lock_) + : file_checker_tasks(std::move(file_checker_tasks_)), lock(std::move(lock_)) + {} + + size_t size() const override { return file_checker_tasks->size(); } + + FileChecker::DataValidationTasksPtr file_checker_tasks; + /// Lock to prevent table modification while checking + ReadLock lock; + }; + FileChecker file_checker; const size_t max_compress_block_size; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4897da37a41..0659b6b48cc 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -702,6 +702,33 @@ std::optional StorageMergeTree::getIncompleteMutationsS return result; } +std::map StorageMergeTree::getUnfinishedMutationCommands() const +{ + std::lock_guard lock(currently_processing_in_background_mutex); + std::vector part_versions_with_names; + auto data_parts = getDataPartsVectorForInternalUsage(); + part_versions_with_names.reserve(data_parts.size()); + for (const auto & part : data_parts) + part_versions_with_names.emplace_back(PartVersionWithName{part->info.getDataVersion(), part->name}); + std::sort(part_versions_with_names.begin(), part_versions_with_names.end(), comparator); + + std::map result; + + for (const auto & kv : current_mutations_by_version) + { + Int64 mutation_version = kv.first; + const MergeTreeMutationEntry & entry = kv.second; + const PartVersionWithName needle{mutation_version, ""}; + auto versions_it = std::lower_bound( + part_versions_with_names.begin(), part_versions_with_names.end(), needle, comparator); + + size_t parts_to_do = versions_it - part_versions_with_names.begin(); + if (parts_to_do > 0) + result.emplace(entry.file_name, entry.commands); + } + return result; +} + std::vector StorageMergeTree::getMutationsStatus() const { std::lock_guard lock(currently_processing_in_background_mutex); @@ -1381,26 +1408,6 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign return scheduled; } -size_t StorageMergeTree::getNumberOfUnfinishedMutations() const -{ - std::unique_lock lock(currently_processing_in_background_mutex); - - size_t count = 0; - for (const auto & [version, _] : current_mutations_by_version | std::views::reverse) - { - auto status = getIncompleteMutationsStatusUnlocked(version, lock, nullptr, true); - if (!status) - continue; - - if (status->is_done) - break; - - ++count; - } - - return count; -} - UInt64 StorageMergeTree::getCurrentMutationVersion( const DataPartPtr & part, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) const @@ -2200,19 +2207,33 @@ void StorageMergeTree::onActionLockRemove(StorageActionBlockType action_type) background_moves_assignee.trigger(); } -CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_context) +IStorage::DataValidationTasksPtr StorageMergeTree::getCheckTaskList(const ASTPtr & query, ContextPtr local_context) { - CheckResults results; DataPartsVector data_parts; if (const auto & check_query = query->as(); check_query.partition) { String partition_id = getPartitionIDFromQuery(check_query.partition, local_context); data_parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); } + else if (!check_query.part_name.empty()) + { + auto part = getPartIfExists(check_query.part_name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + if (!part) + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No such data part '{}' to check in table '{}'", + check_query.part_name, getStorageID().getFullTableName()); + data_parts.emplace_back(std::move(part)); + } else data_parts = getVisibleDataPartsVector(local_context); - for (auto & part : data_parts) + return std::make_unique(std::move(data_parts), local_context); +} + +std::optional StorageMergeTree::checkDataNext(DataValidationTasksPtr & check_task_list) +{ + auto * data_validation_tasks = assert_cast(check_task_list.get()); + auto local_context = data_validation_tasks->context; + if (auto part = data_validation_tasks->next()) { /// If the checksums file is not present, calculate the checksums and write them to disk. static constexpr auto checksums_path = "checksums.txt"; @@ -2226,7 +2247,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ auto & part_mutable = const_cast(*part); part_mutable.writeChecksums(part->checksums, local_context->getWriteSettings()); - results.emplace_back(part->name, true, "Checksums recounted and written to disk."); + return CheckResult(part->name, true, "Checksums recounted and written to disk."); } catch (...) { @@ -2234,7 +2255,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ throw; tryLogCurrentException(log, __PRETTY_FUNCTION__); - results.emplace_back(part->name, false, "Check of part finished with error: '" + getCurrentExceptionMessage(false) + "'"); + return CheckResult(part->name, false, "Check of part finished with error: '" + getCurrentExceptionMessage(false) + "'"); } } else @@ -2242,18 +2263,19 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ try { checkDataPart(part, true); - results.emplace_back(part->name, true, ""); + return CheckResult(part->name, true, ""); } catch (...) { if (isRetryableException(std::current_exception())) throw; - results.emplace_back(part->name, false, getCurrentExceptionMessage(false)); + return CheckResult(part->name, false, getCurrentExceptionMessage(false)); } } } - return results; + + return {}; } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 89da9ab839e..51922b62587 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -108,11 +108,12 @@ public: void onActionLockRemove(StorageActionBlockType action_type) override; - CheckResults checkData(const ASTPtr & query, ContextPtr context) override; + DataValidationTasksPtr getCheckTaskList(const ASTPtr & query, ContextPtr context) override; + std::optional checkDataNext(DataValidationTasksPtr & check_task_list) override; bool scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) override; - size_t getNumberOfUnfinishedMutations() const override; + std::map getUnfinishedMutationCommands() const override; MergeTreeDeduplicationLog * getDeduplicationLog() { return deduplication_log.get(); } @@ -278,6 +279,32 @@ private: friend class MergePlainMergeTreeTask; friend class MutatePlainMergeTreeTask; + struct DataValidationTasks : public IStorage::DataValidationTasksBase + { + DataValidationTasks(DataPartsVector && parts_, ContextPtr context_) + : parts(std::move(parts_)), it(parts.begin()), context(std::move(context_)) + {} + + DataPartPtr next() + { + std::lock_guard lock(mutex); + if (it == parts.end()) + return nullptr; + return *(it++); + } + + size_t size() const override + { + std::lock_guard lock(mutex); + return std::distance(it, parts.end()); + } + + mutable std::mutex mutex; + DataPartsVector parts; + DataPartsVector::const_iterator it; + + ContextPtr context; + }; protected: std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const override; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index f6dd7064a22..f7ee936db8d 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -46,6 +46,8 @@ public: bool supportsParallelInsert() const override { return true; } + bool supportsSubcolumns() const override { return true; } + SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, bool) override { return std::make_shared(metadata_snapshot->getSampleBlock()); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index d4357ffd3ac..fdeadfff95b 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -149,8 +149,11 @@ public: return getNested()->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } - CheckResults checkData(const ASTPtr & query, ContextPtr context) override { return getNested()->checkData(query, context); } + DataValidationTasksPtr getCheckTaskList(const ASTPtr & query, ContextPtr context) override { return getNested()->getCheckTaskList(query, context); } + std::optional checkDataNext(DataValidationTasksPtr & check_task_list) override { return getNested()->checkDataNext(check_task_list); } + void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override { getNested()->checkTableCanBeDropped(query_context); } + bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 14e1a65ce92..069ed20c730 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5739,6 +5739,7 @@ void StorageReplicatedMergeTree::alter( return; } + auto ast_to_str = [](ASTPtr query) -> String { if (!query) @@ -5753,6 +5754,31 @@ void StorageReplicatedMergeTree::alter( while (true) { + if (shutdown_called || partial_shutdown_called) + throw Exception(ErrorCodes::ABORTED, "Cannot assign alter because shutdown called"); + + bool pulled_queue = false; + std::optional maybe_mutations_version_after_logs_pull; + std::map unfinished_mutations; + for (const auto & command : commands) + { + if (command.isDropSomething()) + { + if (shutdown_called || partial_shutdown_called) + throw Exception(ErrorCodes::ABORTED, "Cannot assign alter because shutdown called"); + + if (!pulled_queue) + { + auto [_, mutations_version] = queue.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::SYNC); + maybe_mutations_version_after_logs_pull.emplace(mutations_version); + unfinished_mutations = getUnfinishedMutationCommands(); + pulled_queue = true; + } + + checkDropCommandDoesntAffectInProgressMutations(command, unfinished_mutations, query_context); + } + } + /// Clear nodes from previous iteration alter_entry.emplace(); mutation_znode.reset(); @@ -5866,8 +5892,18 @@ void StorageReplicatedMergeTree::alter( mutation_entry.source_replica = replica_name; mutation_entry.commands = std::move(maybe_mutation_commands); - Coordination::Stat mutations_stat; - zookeeper->get(mutations_path, &mutations_stat); + int32_t mutations_version; + if (maybe_mutations_version_after_logs_pull.has_value()) + { + mutations_version = *maybe_mutations_version_after_logs_pull; + } + else + { + Coordination::Stat mutations_stat; + zookeeper->get(mutations_path, &mutations_stat); + mutations_version = mutations_stat.version; + } + partition_block_numbers_holder = allocateBlockNumbersInAffectedPartitions(mutation_entry.commands, query_context, zookeeper); @@ -5875,7 +5911,7 @@ void StorageReplicatedMergeTree::alter( mutation_entry.block_numbers = partition_block_numbers_holder.getBlockNumbers(); mutation_entry.create_time = time(nullptr); - ops.emplace_back(zkutil::makeSetRequest(mutations_path, String(), mutations_stat.version)); + ops.emplace_back(zkutil::makeSetRequest(mutations_path, String(), mutations_version)); mutation_path_idx = ops.size(); ops.emplace_back( zkutil::makeCreateRequest(fs::path(mutations_path) / "", mutation_entry.toString(), zkutil::CreateMode::PersistentSequential)); @@ -8562,36 +8598,46 @@ void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, t part_check_thread.enqueuePart(part_name, delay_to_check_seconds); } -CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, ContextPtr local_context) +IStorage::DataValidationTasksPtr StorageReplicatedMergeTree::getCheckTaskList(const ASTPtr & query, ContextPtr local_context) { - CheckResults results; DataPartsVector data_parts; if (const auto & check_query = query->as(); check_query.partition) { String partition_id = getPartitionIDFromQuery(check_query.partition, local_context); data_parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); } + else if (!check_query.part_name.empty()) + { + auto part = getPartIfExists(check_query.part_name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + if (!part) + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No such data part '{}' to check in table '{}'", + check_query.part_name, getStorageID().getFullTableName()); + data_parts.emplace_back(std::move(part)); + } else data_parts = getVisibleDataPartsVector(local_context); - { - auto part_check_lock = part_check_thread.pausePartsCheck(); + auto part_check_lock = part_check_thread.pausePartsCheck(); + return std::make_unique(std::move(data_parts), std::move(part_check_lock)); +} - for (auto & part : data_parts) +std::optional StorageReplicatedMergeTree::checkDataNext(DataValidationTasksPtr & check_task_list) +{ + + if (auto part = assert_cast(check_task_list.get())->next()) + { + try { - try - { - results.push_back(part_check_thread.checkPartAndFix(part->name)); - } - catch (const Exception & ex) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'"); - } + return CheckResult(part_check_thread.checkPartAndFix(part->name)); + } + catch (const Exception & ex) + { + tryLogCurrentException(log, __PRETTY_FUNCTION__); + return CheckResult(part->name, false, "Check of part finished with error: '" + ex.message() + "'"); } } - return results; + return {}; } @@ -8701,9 +8747,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const return toString(table_shared_id); } -size_t StorageReplicatedMergeTree::getNumberOfUnfinishedMutations() const +std::map StorageReplicatedMergeTree::getUnfinishedMutationCommands() const { - return queue.countUnfinishedMutations(); + return queue.getUnfinishedMutations(); } void StorageReplicatedMergeTree::createTableSharedID() const diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 794991d8e06..347f78199d3 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -230,7 +230,8 @@ public: /// Add a part to the queue of parts whose data you want to check in the background thread. void enqueuePartForCheck(const String & part_name, time_t delay_to_check_seconds = 0); - CheckResults checkData(const ASTPtr & query, ContextPtr context) override; + DataValidationTasksPtr getCheckTaskList(const ASTPtr & query, ContextPtr context) override; + std::optional checkDataNext(DataValidationTasksPtr & check_task_list) override; /// Checks ability to use granularity bool canUseAdaptiveGranularity() const override; @@ -347,7 +348,7 @@ public: // Return table id, common for different replicas String getTableSharedID() const override; - size_t getNumberOfUnfinishedMutations() const override; + std::map getUnfinishedMutationCommands() const override; /// Returns the same as getTableSharedID(), but extracts it from a create query. static std::optional tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); @@ -995,6 +996,34 @@ private: bool waitZeroCopyLockToDisappear(const ZeroCopyLock & lock, size_t milliseconds_to_wait) override; void startupImpl(bool from_attach_thread); + + struct DataValidationTasks : public IStorage::DataValidationTasksBase + { + explicit DataValidationTasks(DataPartsVector && parts_, std::unique_lock && parts_check_lock_) + : parts_check_lock(std::move(parts_check_lock_)), parts(std::move(parts_)), it(parts.begin()) + {} + + DataPartPtr next() + { + std::lock_guard lock(mutex); + if (it == parts.end()) + return nullptr; + return *(it++); + } + + size_t size() const override + { + std::lock_guard lock(mutex); + return std::distance(it, parts.end()); + } + + std::unique_lock parts_check_lock; + + mutable std::mutex mutex; + DataPartsVector parts; + DataPartsVector::const_iterator it; + }; + }; String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ebfd36fd9c8..c96c5e3710e 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -498,7 +498,7 @@ size_t StorageS3Source::KeysIterator::estimatedKeysCount() StorageS3Source::ReadTaskIterator::ReadTaskIterator( const DB::ReadTaskCallback & callback_, - const size_t max_threads_count) + size_t max_threads_count) : callback(callback_) { ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, max_threads_count); @@ -546,7 +546,7 @@ StorageS3Source::StorageS3Source( const size_t max_parsing_threads_, bool need_only_count_, std::optional query_info_) - : ISource(info.source_header, false) + : SourceWithKeyCondition(info.source_header, false) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) @@ -569,9 +569,17 @@ StorageS3Source::StorageS3Source( , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) { +} + +void StorageS3Source::lazyInitialize() +{ + if (initialized) + return; + reader = createReader(); if (reader) reader_future = createReaderAsync(); + initialized = true; } StorageS3Source::ReaderHolder StorageS3Source::createReader() @@ -620,8 +628,8 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() compression_method, need_only_count); - if (query_info.has_value()) - input_format->setQueryInfo(query_info.value(), getContext()); + if (key_condition) + input_format->setKeyCondition(key_condition); if (need_only_count) input_format->needOnlyCount(); @@ -736,6 +744,8 @@ String StorageS3Source::getName() const Chunk StorageS3Source::generate() { + lazyInitialize(); + while (true) { if (isCancelled() || !reader) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 53dd47b85a1..714b9c68baa 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include @@ -36,7 +36,7 @@ namespace DB class PullingPipelineExecutor; class NamedCollection; -class StorageS3Source : public ISource, WithContext +class StorageS3Source : public SourceWithKeyCondition, WithContext { public: @@ -120,7 +120,7 @@ public: class ReadTaskIterator : public IIterator { public: - explicit ReadTaskIterator(const ReadTaskCallback & callback_, const size_t max_threads_count); + explicit ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count); KeyWithInfoPtr next() override; size_t estimatedKeysCount() override; @@ -154,6 +154,16 @@ public: String getName() const override; + void setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_) override + { + setKeyConditionImpl(query_info_, context_, sample_block); + } + + void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override + { + setKeyConditionImpl(nodes, context_, sample_block); + } + Chunk generate() override; private: @@ -245,9 +255,14 @@ private: ThreadPool create_reader_pool; ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; + std::atomic initialized{false}; size_t total_rows_in_file = 0; + /// Notice: we should initialize reader and future_reader lazily in generate to make sure key_condition + /// is set before createReader is invoked for key_condition is read in createReader. + void lazyInitialize(); + /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 83336cbd22e..3be74077073 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -403,16 +403,18 @@ SinkToStoragePtr StorageStripeLog::write(const ASTPtr & /*query*/, const Storage return std::make_shared(*this, metadata_snapshot, std::move(lock)); } - -CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, ContextPtr local_context) +IStorage::DataValidationTasksPtr StorageStripeLog::getCheckTaskList(const ASTPtr & /* query */, ContextPtr local_context) { ReadLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); - - return file_checker.check(); + return std::make_unique(file_checker.getDataValidationTasks(), std::move(lock)); } +std::optional StorageStripeLog::checkDataNext(DataValidationTasksPtr & check_task_list) +{ + return file_checker.checkNextEntry(assert_cast(check_task_list.get())->file_checker_tasks); +} void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) { diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index f889a1de71b..636de56e9d9 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -53,7 +53,8 @@ public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; - CheckResults checkData(const ASTPtr & query, ContextPtr ocal_context) override; + DataValidationTasksPtr getCheckTaskList(const ASTPtr & query, ContextPtr context) override; + std::optional checkDataNext(DataValidationTasksPtr & check_task_list) override; bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } @@ -93,6 +94,20 @@ private: const DiskPtr disk; String table_path; + struct DataValidationTasks : public IStorage::DataValidationTasksBase + { + DataValidationTasks(FileChecker::DataValidationTasksPtr file_checker_tasks_, ReadLock && lock_) + : file_checker_tasks(std::move(file_checker_tasks_)), lock(std::move(lock_)) + {} + + size_t size() const override { return file_checker_tasks->size(); } + + FileChecker::DataValidationTasksPtr file_checker_tasks; + + /// Lock to prevent table modification while checking + ReadLock lock; + }; + String data_file_path; String index_file_path; FileChecker file_checker; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index ffb92db9279..b58b4b23320 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -272,12 +272,12 @@ StorageURLSource::StorageURLSource( const ConnectionTimeouts & timeouts, CompressionMethod compression_method, size_t max_parsing_threads, - const SelectQueryInfo & query_info, + const SelectQueryInfo &, const HTTPHeaderEntries & headers_, const URIParams & params, bool glob_url, bool need_only_count_) - : ISource(info.source_header, false), WithContext(context_) + : SourceWithKeyCondition(info.source_header, false), WithContext(context_) , name(std::move(name_)) , columns_description(info.columns_description) , requested_columns(info.requested_columns) @@ -358,7 +358,9 @@ StorageURLSource::StorageURLSource( /* is_remote_ fs */ true, compression_method, need_only_count); - input_format->setQueryInfo(query_info, getContext()); + + if (key_condition) + input_format->setKeyCondition(key_condition); if (need_only_count) input_format->needOnlyCount(); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 8257bd65f9c..04420062170 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -127,7 +127,7 @@ private: }; -class StorageURLSource : public ISource, WithContext +class StorageURLSource : public SourceWithKeyCondition, WithContext { using URIParams = std::vector>; @@ -169,6 +169,16 @@ public: String getName() const override { return name; } + void setKeyCondition(const SelectQueryInfo & query_info_, ContextPtr context_) override + { + setKeyConditionImpl(query_info_, context_, block_for_format); + } + + void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override + { + setKeyConditionImpl(nodes, context_, block_for_format); + } + Chunk generate() override; static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri); diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 6d50e9138ff..d8524963776 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -33,7 +33,7 @@ protected: if (TableFunction::configuration.structure != "auto") columns = parseColumnsListFromString(TableFunction::configuration.structure, context); - StoragePtr storage = std::make_shared( + StoragePtr storage = Storage::create( TableFunction::configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt); diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp index 5407d647f0f..d37aace01c6 100644 --- a/src/TableFunctions/TableFunctionIceberg.cpp +++ b/src/TableFunctions/TableFunctionIceberg.cpp @@ -2,7 +2,7 @@ #if USE_AWS_S3 && USE_AVRO -#include +#include #include #include #include @@ -17,7 +17,7 @@ struct TableFunctionIcebergName static constexpr auto name = "iceberg"; }; -using TableFunctionIceberg = ITableFunctionDataLake; +using TableFunctionIceberg = ITableFunctionDataLake; void registerTableFunctionIceberg(TableFunctionFactory & factory) { diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index eb124f110bf..de04f8adc4b 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -84,4 +84,6 @@ 02818_parameterized_view_with_cte_multiple_usage 01940_custom_tld_sharding_key 02815_range_dict_no_direct_join +02845_threads_count_in_distributed_queries 02861_join_on_nullsafe_compare +01019_alter_materialized_view_consistent diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 8cc40839706..ef54191620d 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -65,6 +65,9 @@ def get_options(i: int, upgrade_check: bool) -> str: f"partial_result_update_duration_ms={random.randint(10, 1000)}" ) + if random.random() < 0.1: + client_options.append("optimize_trivial_approximate_count_query=1") + if client_options: options.append(" --client-option " + " ".join(client_options)) @@ -109,9 +112,11 @@ def compress_stress_logs(output_path: Path, files_prefix: str) -> None: def call_with_retry(query: str, timeout: int = 30, retry_count: int = 5) -> None: + logging.info("Running command: %s", str(query)) for i in range(retry_count): code = call(query, shell=True, stderr=STDOUT, timeout=timeout) if code != 0: + logging.info("Command returend %s, retrying", str(code)) time.sleep(i) else: break @@ -129,17 +134,17 @@ def prepare_for_hung_check(drop_databases: bool) -> bool: # We attach gdb to clickhouse-server before running tests # to print stacktraces of all crashes even if clickhouse cannot print it for some reason. - # However, it obstruct checking for hung queries. + # However, it obstructs checking for hung queries. logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") + call_with_retry("tail --pid=$(pidof gdb) -f /dev/null") # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb - call_with_retry("kill -CONT $(lsof -ti:9000)") + call_with_retry( + "kill -CONT $(cat /var/run/clickhouse-server/clickhouse-server.pid) && clickhouse client -q 'SELECT 1 FORMAT Null'" + ) # ThreadFuzzer significantly slows down server and causes false-positive hung check failures - call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") - - call_with_retry(make_query_command("SELECT 1 FORMAT Null")) - + call_with_retry(make_query_command("SYSTEM STOP THREAD FUZZER")) # Some tests execute SYSTEM STOP MERGES or similar queries. # It may cause some ALTERs to hang. # Possibly we should fix tests and forbid to use such queries without specifying table. diff --git a/tests/config/install.sh b/tests/config/install.sh index 9e3b235515d..a5037bfb64e 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -169,11 +169,17 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] chgrp clickhouse /etc/clickhouse-server2 sudo -u clickhouse cp -r /etc/clickhouse-server/* /etc/clickhouse-server1 sudo -u clickhouse cp -r /etc/clickhouse-server/* /etc/clickhouse-server2 + rm /etc/clickhouse-server1/config.d/macros.xml rm /etc/clickhouse-server2/config.d/macros.xml sudo -u clickhouse cat /etc/clickhouse-server/config.d/macros.xml | sed "s|r1|r2|" > /etc/clickhouse-server1/config.d/macros.xml sudo -u clickhouse cat /etc/clickhouse-server/config.d/macros.xml | sed "s|s1|s2|" > /etc/clickhouse-server2/config.d/macros.xml + rm /etc/clickhouse-server1/config.d/transactions.xml + rm /etc/clickhouse-server2/config.d/transactions.xml + sudo -u clickhouse cat /etc/clickhouse-server/config.d/transactions.xml | sed "s|/test/clickhouse/txn|/test/clickhouse/txn1|" > /etc/clickhouse-server1/config.d/transactions.xml + sudo -u clickhouse cat /etc/clickhouse-server/config.d/transactions.xml | sed "s|/test/clickhouse/txn|/test/clickhouse/txn2|" > /etc/clickhouse-server2/config.d/transactions.xml + sudo mkdir -p /var/lib/clickhouse1 sudo mkdir -p /var/lib/clickhouse2 sudo chown clickhouse /var/lib/clickhouse1 diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 478bb5ebf72..79d498b909f 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -143,6 +143,9 @@ class KeeperClient(object): def find_super_nodes(self, threshold: int, timeout: float = 60.0) -> str: return self.execute_query(f"find_super_nodes {threshold}", timeout) + def get_direct_children_number(self, path: str, timeout: float = 60.0) -> str: + return self.execute_query(f"get_direct_children_number {path}", timeout) + def get_all_children_number(self, path: str, timeout: float = 60.0) -> str: return self.execute_query(f"get_all_children_number {path}", timeout) diff --git a/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf_l.xml b/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf_l.xml new file mode 100644 index 00000000000..7f866e9beed --- /dev/null +++ b/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf_l.xml @@ -0,0 +1,52 @@ + + + + + 0 + + + s3 + https://vdimir-test2.s3.amazonaws.com/ttt/ + AKIAZURMN3FVQCQT6Y5U + pTfhdJgl4HOSIgL+aIE/pnGTZ7IAXMMcYvGhiDnb + eu-central-1 + /var/lib/clickhouse/gcs/ + false + + + cache + s3 + /var/lib/clickhouse/s3_cache/ + 10Gi + + + + + + + default + 10000000 + +
+ s3_cache +
+
+ 0.99 +
+ + + + + default + + + s3 + + + + +
+
+ + true +
diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py index 26962236869..19c22201fb0 100644 --- a/tests/integration/test_broken_part_during_merge/test.py +++ b/tests/integration/test_broken_part_during_merge/test.py @@ -24,7 +24,7 @@ def test_merge_and_part_corruption(started_cluster): node1.query( """ CREATE TABLE replicated_mt(date Date, id UInt32, value Int32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id + ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') ORDER BY id SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0; """.format( replica=node1.name @@ -59,7 +59,8 @@ def test_merge_and_part_corruption(started_cluster): # corrupt part after merge already assigned, but not started res_opt = p.apply_async(optimize_with_delay, (1,)) node1.query( - "CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0} + "CHECK TABLE replicated_mt", + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) # start merge node1.query("SYSTEM START REPLICATION QUEUES replicated_mt") diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py index 99a5846d4ee..17e4ee92e26 100644 --- a/tests/integration/test_check_table/test.py +++ b/tests/integration/test_check_table/test.py @@ -1,6 +1,7 @@ import pytest from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) @@ -78,7 +79,7 @@ def test_check_normal_table_corruption(started_cluster): assert ( node1.query( "CHECK TABLE non_replicated_mt PARTITION 201902", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "201902_1_1_0\t1\t\n" ) @@ -88,7 +89,7 @@ def test_check_normal_table_corruption(started_cluster): assert ( node1.query( "CHECK TABLE non_replicated_mt", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ).strip() == "201902_1_1_0\t1\tChecksums recounted and written to disk." ) @@ -100,7 +101,7 @@ def test_check_normal_table_corruption(started_cluster): assert ( node1.query( "CHECK TABLE non_replicated_mt PARTITION 201902", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ).strip() == "201902_1_1_0\t1\tChecksums recounted and written to disk." ) @@ -111,12 +112,12 @@ def test_check_normal_table_corruption(started_cluster): assert node1.query( "CHECK TABLE non_replicated_mt", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"] assert node1.query( "CHECK TABLE non_replicated_mt", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"] node1.query( @@ -126,7 +127,7 @@ def test_check_normal_table_corruption(started_cluster): assert ( node1.query( "CHECK TABLE non_replicated_mt PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "201901_2_2_0\t1\t\n" ) @@ -137,7 +138,7 @@ def test_check_normal_table_corruption(started_cluster): assert node1.query( "CHECK TABLE non_replicated_mt PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ).strip().split("\t")[0:2] == ["201901_2_2_0", "0"] @@ -164,13 +165,15 @@ def test_check_replicated_table_simple(started_cluster): assert ( node1.query( - "CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0} + "CHECK TABLE replicated_mt", + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "201902_0_0_0\t1\t\n" ) assert ( node2.query( - "CHECK TABLE replicated_mt", settings={"check_query_single_value_result": 0} + "CHECK TABLE replicated_mt", + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "201902_0_0_0\t1\t\n" ) @@ -185,18 +188,40 @@ def test_check_replicated_table_simple(started_cluster): assert ( node1.query( "CHECK TABLE replicated_mt PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "201901_0_0_0\t1\t\n" ) assert ( node2.query( "CHECK TABLE replicated_mt PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "201901_0_0_0\t1\t\n" ) + assert sorted( + node2.query( + "CHECK TABLE replicated_mt", + settings={"check_query_single_value_result": 0}, + ).split("\n") + ) == ["", "201901_0_0_0\t1\t", "201902_0_0_0\t1\t"] + + with pytest.raises(QueryRuntimeException) as exc: + node2.query( + "CHECK TABLE replicated_mt PART '201801_0_0_0'", + settings={"check_query_single_value_result": 0}, + ) + assert "NO_SUCH_DATA_PART" in str(exc.value) + + assert ( + node2.query( + "CHECK TABLE replicated_mt PART '201902_0_0_0'", + settings={"check_query_single_value_result": 0}, + ) + == "201902_0_0_0\t1\t\n" + ) + def test_check_replicated_table_corruption(started_cluster): for node in [node1, node2]: @@ -229,7 +254,7 @@ def test_check_replicated_table_corruption(started_cluster): corrupt_data_part_on_disk(node1, "replicated_mt_1", part_name) assert node1.query( "CHECK TABLE replicated_mt_1 PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format( p=part_name ) @@ -237,14 +262,14 @@ def test_check_replicated_table_corruption(started_cluster): node1.query_with_retry("SYSTEM SYNC REPLICA replicated_mt_1") assert node1.query( "CHECK TABLE replicated_mt_1 PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "{}\t1\t\n".format(part_name) assert node1.query("SELECT count() from replicated_mt_1") == "4\n" remove_part_from_disk(node2, "replicated_mt_1", part_name) assert node2.query( "CHECK TABLE replicated_mt_1 PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "{p}\t0\tPart {p} looks broken. Removing it and will try to fetch.\n".format( p=part_name ) @@ -252,6 +277,6 @@ def test_check_replicated_table_corruption(started_cluster): node1.query("SYSTEM SYNC REPLICA replicated_mt_1") assert node1.query( "CHECK TABLE replicated_mt_1 PARTITION 201901", - settings={"check_query_single_value_result": 0}, + settings={"check_query_single_value_result": 0, "max_threads": 1}, ) == "{}\t1\t\n".format(part_name) assert node1.query("SELECT count() from replicated_mt_1") == "4\n" diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py index 1b347f6271f..f6ac42a2db2 100644 --- a/tests/integration/test_http_handlers_config/test.py +++ b/tests/integration/test_http_handlers_config/test.py @@ -159,6 +159,13 @@ def test_predefined_query_handler(): assert cluster.instance.query("SELECT * FROM test_table") == "100\tTEST\n" cluster.instance.query("DROP TABLE test_table") + res4 = cluster.instance.http_request( + "test_predefined_handler_get?max_threads=1¶m_setting_name=max_threads", + method="GET", + headers={"XXX": "xxx"}, + ) + assert b"max_threads\t1\n" == res1.content + def test_fixed_static_handler(): with contextlib.closing( @@ -441,6 +448,55 @@ def test_defaults_http_handlers(): == cluster.instance.http_request("?query=SELECT+1", method="GET").content ) + assert ( + 404 + == cluster.instance.http_request( + "/nonexistent?query=SELECT+1", method="GET" + ).status_code + ) + + +def test_defaults_http_handlers_config_order(): + def check_predefined_query_handler(): + assert ( + 200 + == cluster.instance.http_request( + "?query=SELECT+1", method="GET" + ).status_code + ) + assert ( + b"1\n" + == cluster.instance.http_request("?query=SELECT+1", method="GET").content + ) + response = cluster.instance.http_request( + "test_predefined_handler_get?max_threads=1&setting_name=max_threads", + method="GET", + headers={"XXX": "xxx"}, + ) + assert b"max_threads\t1\n" == response.content + assert ( + "text/tab-separated-values; charset=UTF-8" + == response.headers["content-type"] + ) + + with contextlib.closing( + SimpleCluster( + ClickHouseCluster(__file__), + "defaults_handlers_config_order_first", + "test_defaults_handlers_config_order/defaults_first", + ) + ) as cluster: + check_predefined_query_handler() + + with contextlib.closing( + SimpleCluster( + ClickHouseCluster(__file__), + "defaults_handlers_config_order_first", + "test_defaults_handlers_config_order/defaults_last", + ) + ) as cluster: + check_predefined_query_handler() + def test_prometheus_handler(): with contextlib.closing( diff --git a/tests/integration/test_http_handlers_config/test_defaults_handlers_config_order/defaults_first/config.xml b/tests/integration/test_http_handlers_config/test_defaults_handlers_config_order/defaults_first/config.xml new file mode 100644 index 00000000000..d07f18cce7b --- /dev/null +++ b/tests/integration/test_http_handlers_config/test_defaults_handlers_config_order/defaults_first/config.xml @@ -0,0 +1,17 @@ + + + Default server response + + + + + GET + xxx + /test_predefined_handler_get + + predefined_query_handler + SELECT name, value FROM system.settings WHERE name = {setting_name:String} + + + + diff --git a/tests/integration/test_http_handlers_config/test_defaults_handlers_config_order/defaults_last/config.xml b/tests/integration/test_http_handlers_config/test_defaults_handlers_config_order/defaults_last/config.xml new file mode 100644 index 00000000000..24340e4de25 --- /dev/null +++ b/tests/integration/test_http_handlers_config/test_defaults_handlers_config_order/defaults_last/config.xml @@ -0,0 +1,17 @@ + + + Default server response + + + + GET + xxx + /test_predefined_handler_get + + predefined_query_handler + SELECT name, value FROM system.settings WHERE name = {setting_name:String} + + + + + diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 92b5b95dc50..9d7a46001e7 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -218,6 +218,18 @@ def test_quoted_argument_parsing(client: KeeperClient): assert client.get(node_path) == "value4 with some whitespace" +def get_direct_children_number(client: KeeperClient): + client.touch("/get_direct_children_number") + client.touch("/get_direct_children_number/1") + client.touch("/get_direct_children_number/1/1") + client.touch("/get_direct_children_number/1/2") + client.touch("/get_direct_children_number/2") + client.touch("/get_direct_children_number/2/1") + client.touch("/get_direct_children_number/2/2") + + assert client.get_direct_children_number("/get_direct_children_number") == "2" + + def test_get_all_children_number(client: KeeperClient): client.touch("/test_get_all_children_number") client.touch("/test_get_all_children_number/1") diff --git a/tests/queries/0_stateless/02876_experimental_partial_result.reference b/tests/integration/test_replicated_database_cluster_groups/__init__.py similarity index 100% rename from tests/queries/0_stateless/02876_experimental_partial_result.reference rename to tests/integration/test_replicated_database_cluster_groups/__init__.py diff --git a/tests/integration/test_replicated_database_cluster_groups/configs/backup_group.xml b/tests/integration/test_replicated_database_cluster_groups/configs/backup_group.xml new file mode 100644 index 00000000000..3df343bbc9e --- /dev/null +++ b/tests/integration/test_replicated_database_cluster_groups/configs/backup_group.xml @@ -0,0 +1,3 @@ + + backups + diff --git a/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml b/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml new file mode 100644 index 00000000000..5666ffeace8 --- /dev/null +++ b/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml @@ -0,0 +1,14 @@ + + + + 1 + 1 + 1 + + + + + default + + + diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py new file mode 100644 index 00000000000..b14581c1fe6 --- /dev/null +++ b/tests/integration/test_replicated_database_cluster_groups/test.py @@ -0,0 +1,129 @@ +import re +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +main_node_1 = cluster.add_instance( + "main_node_1", + user_configs=["configs/settings.xml"], + with_zookeeper=True, + stay_alive=True, + macros={"shard": 1, "replica": 1}, +) +main_node_2 = cluster.add_instance( + "main_node_2", + user_configs=["configs/settings.xml"], + with_zookeeper=True, + stay_alive=True, + macros={"shard": 1, "replica": 2}, +) +backup_node_1 = cluster.add_instance( + "backup_node_1", + main_configs=["configs/backup_group.xml"], + user_configs=["configs/settings.xml"], + with_zookeeper=True, + stay_alive=True, + macros={"shard": 1, "replica": 3}, +) +backup_node_2 = cluster.add_instance( + "backup_node_2", + main_configs=["configs/backup_group.xml"], + user_configs=["configs/settings.xml"], + with_zookeeper=True, + stay_alive=True, + macros={"shard": 1, "replica": 4}, +) + +all_nodes = [ + main_node_1, + main_node_2, + backup_node_1, + backup_node_2, +] + +uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") + + +def assert_create_query(nodes, table_name, expected): + replace_uuid = lambda x: re.sub(uuid_regex, "uuid", x) + query = "show create table {}".format(table_name) + for node in nodes: + assert_eq_with_retry(node, query, expected, get_result=replace_uuid) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_cluster_groups(started_cluster): + for node in all_nodes: + node.query( + f"CREATE DATABASE cluster_groups ENGINE = Replicated('/test/cluster_groups', '{node.macros['shard']}', '{node.macros['replica']}');" + ) + + # 1. system.clusters + + cluster_query = "SELECT host_name from system.clusters WHERE cluster = 'cluster_groups' ORDER BY host_name" + expected_main = "main_node_1\nmain_node_2\n" + expected_backup = "backup_node_1\nbackup_node_2\n" + + for node in [main_node_1, main_node_2]: + assert_eq_with_retry(node, cluster_query, expected_main) + + for node in [backup_node_1, backup_node_2]: + assert_eq_with_retry(node, cluster_query, expected_backup) + + # 2. Query execution depends only on your cluster group + + backup_node_1.stop_clickhouse() + backup_node_2.stop_clickhouse() + + # OK + main_node_1.query( + "CREATE TABLE cluster_groups.table_1 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);" + ) + + # Exception + main_node_2.stop_clickhouse() + settings = {"distributed_ddl_task_timeout": 5} + assert ( + "There are 1 unfinished hosts (0 of them are currently active)" + in main_node_1.query_and_get_error( + "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);", + settings=settings, + ) + ) + + # 3. After start both groups are synced + + backup_node_1.start_clickhouse() + backup_node_2.start_clickhouse() + main_node_2.start_clickhouse() + + expected_1 = "CREATE TABLE cluster_groups.table_1\\n(\\n `d` Date,\\n `k` UInt64\\n)\\nENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nPARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192" + expected_2 = "CREATE TABLE cluster_groups.table_2\\n(\\n `d` Date,\\n `k` UInt64\\n)\\nENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/{uuid}/{shard}\\', \\'{replica}\\')\\nPARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192" + + assert_create_query(all_nodes, "cluster_groups.table_1", expected_1) + assert_create_query(all_nodes, "cluster_groups.table_2", expected_2) + + # 4. SYSTEM DROP DATABASE REPLICA + backup_node_2.stop_clickhouse() + backup_node_1.query( + "SYSTEM DROP DATABASE REPLICA '1|4' FROM DATABASE cluster_groups" + ) + + assert_eq_with_retry(backup_node_1, cluster_query, "backup_node_1\n") + + main_node_2.stop_clickhouse() + main_node_1.query("SYSTEM DROP DATABASE REPLICA '1|2' FROM DATABASE cluster_groups") + + assert_eq_with_retry(main_node_1, cluster_query, "main_node_1\n") diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index 89e711745b4..11198a7175b 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -41,6 +41,10 @@ def get_spark(): .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") .config("spark.sql.catalog.spark_catalog.type", "hadoop") .config("spark.sql.catalog.spark_catalog.warehouse", "/iceberg_data") + .config( + "spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", + ) .master("local") ) return builder.master("local").getOrCreate() @@ -129,12 +133,12 @@ def generate_data(spark, start, end): return df -def create_iceberg_table(node, table_name): +def create_iceberg_table(node, table_name, format="Parquet"): node.query( f""" DROP TABLE IF EXISTS {table_name}; CREATE TABLE {table_name} - ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/')""" + ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/', format={format})""" ) @@ -165,7 +169,7 @@ def test_single_iceberg_file(started_cluster, format_version): bucket = started_cluster.minio_bucket TABLE_NAME = "test_single_iceberg_file_" + format_version - inserted_data = "SELECT number, toString(number) FROM numbers(100)" + inserted_data = "SELECT number, toString(number) as string FROM numbers(100)" parquet_data_path = create_initial_data_file( started_cluster, instance, inserted_data, TABLE_NAME ) @@ -308,7 +312,7 @@ def test_types(started_cluster, format_version): [ ["a", "Nullable(Int32)"], ["b", "Nullable(String)"], - ["c", "Nullable(Date32)"], + ["c", "Nullable(Date)"], ["d", "Array(Nullable(String))"], ["e", "Nullable(Bool)"], ] @@ -367,3 +371,147 @@ def test_delete_files(started_cluster, format_version): ) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 50 + + +@pytest.mark.parametrize("format_version", ["1", "2"]) +def test_evolved_schema(started_cluster, format_version): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_evolved_schema_" + format_version + + write_iceberg_from_df( + spark, + generate_data(spark, 0, 100), + TABLE_NAME, + mode="overwrite", + format_version=format_version, + ) + + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + create_iceberg_table(instance, TABLE_NAME) + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + spark.sql(f"ALTER TABLE {TABLE_NAME} ADD COLUMNS (x bigint)") + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + error = instance.query_and_get_error(f"SELECT * FROM {TABLE_NAME}") + assert "UNSUPPORTED_METHOD" in error + + +def test_row_based_deletes(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_row_based_deletes" + + spark.sql( + f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')" + ) + spark.sql( + f"INSERT INTO {TABLE_NAME} select id, char(id + ascii('a')) from range(100)" + ) + + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + create_iceberg_table(instance, TABLE_NAME) + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + spark.sql(f"DELETE FROM {TABLE_NAME} WHERE id < 10") + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + error = instance.query_and_get_error(f"SELECT * FROM {TABLE_NAME}") + assert "UNSUPPORTED_METHOD" in error + + +@pytest.mark.parametrize("format_version", ["1", "2"]) +def test_schema_inference(started_cluster, format_version): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + for format in ["Parquet", "ORC", "Avro"]: + TABLE_NAME = "test_schema_inference_" + format + "_" + format_version + + # Types time, timestamptz, fixed are not supported in Spark. + spark.sql( + f"CREATE TABLE {TABLE_NAME} (intC int, longC long, floatC float, doubleC double, decimalC1 decimal(10, 3), decimalC2 decimal(20, 10), decimalC3 decimal(38, 30), dateC date, timestampC timestamp, stringC string, binaryC binary, arrayC1 array, mapC1 map, structC1 struct, complexC array>>, field2: struct>>) USING iceberg TBLPROPERTIES ('format-version' = '{format_version}', 'write.format.default' = '{format}')" + ) + + spark.sql( + f"insert into {TABLE_NAME} select 42, 4242, 42.42, 4242.4242, decimal(42.42), decimal(42.42), decimal(42.42), date('2020-01-01'), timestamp('2020-01-01 20:00:00'), 'hello', binary('hello'), array(1,2,3), map('key', 'value'), struct(42, 'hello'), array(struct(map('key', array(map('key', 42))), struct(42, 'hello')))" + ) + + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + create_iceberg_table(instance, TABLE_NAME, format) + + res = instance.query(f"DESC {TABLE_NAME} FORMAT TSVRaw") + expected = TSV( + [ + ["intC", "Nullable(Int32)"], + ["longC", "Nullable(Int64)"], + ["floatC", "Nullable(Float32)"], + ["doubleC", "Nullable(Float64)"], + ["decimalC1", "Nullable(Decimal(10, 3))"], + ["decimalC2", "Nullable(Decimal(20, 10))"], + ["decimalC3", "Nullable(Decimal(38, 30))"], + ["dateC", "Nullable(Date)"], + ["timestampC", "Nullable(DateTime64(6, 'UTC'))"], + ["stringC", "Nullable(String)"], + ["binaryC", "Nullable(String)"], + ["arrayC1", "Array(Nullable(Int32))"], + ["mapC1", "Map(String, Nullable(String))"], + ["structC1", "Tuple(field1 Nullable(Int32), field2 Nullable(String))"], + [ + "complexC", + "Array(Tuple(field1 Map(String, Array(Map(String, Nullable(Int32)))), field2 Tuple(field3 Nullable(Int32), field4 Nullable(String))))", + ], + ] + ) + + assert res == expected + + # Check that we can parse data + instance.query(f"SELECT * FROM {TABLE_NAME}") + + +@pytest.mark.parametrize("format_version", ["1", "2"]) +def test_metadata_file_selection(started_cluster, format_version): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + TABLE_NAME = "test_metadata_selection_" + format_version + + spark.sql( + f"CREATE TABLE {TABLE_NAME} (id bigint, data string) USING iceberg TBLPROPERTIES ('format-version' = '2', 'write.update.mode'='merge-on-read', 'write.delete.mode'='merge-on-read', 'write.merge.mode'='merge-on-read')" + ) + + for i in range(50): + spark.sql( + f"INSERT INTO {TABLE_NAME} select id, char(id + ascii('a')) from range(10)" + ) + + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + create_iceberg_table(instance, TABLE_NAME) + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500 diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 079311422cc..29757870f9c 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -159,7 +159,7 @@ def generate_random_files( values_csv = ( "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" ).encode() - print(f"File {filename}, content: {total_values}") + print(f"File {filename}, content: {rand_values}") put_s3_file_content(started_cluster, filename, values_csv) return total_values diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py index 8a3081e0c15..84f75d66089 100644 --- a/tests/integration/test_system_start_stop_listen/test.py +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -30,13 +30,10 @@ def started_cluster(): def http_works(port=8123): try: - response = requests.post(f"http://{main_node.ip_address}:{port}/ping") - if response.status_code == 400: - return True - except: - pass - - return False + response = requests.get(f"http://{main_node.ip_address}:{port}/ping") + return response.status_code == 200 + except requests.exceptions.ConnectionError: + return False def assert_everything_works(): diff --git a/tests/performance/array_fold.xml b/tests/performance/array_fold.xml index fae8bd164a7..32bd45beb1e 100644 --- a/tests/performance/array_fold.xml +++ b/tests/performance/array_fold.xml @@ -1,5 +1,5 @@ - SELECT arrayFold((x, acc) -> acc + x, range(number % 100), toUInt64(0)) from numbers(100000) Format Null - SELECT arrayFold((x, acc) -> acc + 1, range(number % 100), toUInt64(0)) from numbers(100000) Format Null - SELECT arrayFold((x, acc) -> acc + x, range(number), toUInt64(0)) from numbers(10000) Format Null + SELECT arrayFold((acc, x) -> acc + x, range(number % 100), toUInt64(0)) from numbers(100000) Format Null + SELECT arrayFold((acc, x) -> acc + 1, range(number % 100), toUInt64(0)) from numbers(100000) Format Null + SELECT arrayFold((acc, x) -> acc + x, range(number), toUInt64(0)) from numbers(10000) Format Null diff --git a/tests/performance/enum_in_set.xml b/tests/performance/enum_in_set.xml new file mode 100644 index 00000000000..bfd9742a206 --- /dev/null +++ b/tests/performance/enum_in_set.xml @@ -0,0 +1,18 @@ + + + + CREATE TABLE iso_3166_1_alpha_2 + ( + `c` Enum8('LI' = -128, 'LT' = -127, 'LU' = -126, 'MO' = -125, 'MK' = -124, 'MG' = -123, 'MW' = -122, 'MY' = -121, 'MV' = -120, 'ML' = -119, 'MT' = -118, 'MH' = -117, 'MQ' = -116, 'MR' = -115, 'MU' = -114, 'YT' = -113, 'MX' = -112, 'FM' = -111, 'MD' = -110, 'MC' = -109, 'MN' = -108, 'ME' = -107, 'MS' = -106, 'MA' = -105, 'MZ' = -104, 'MM' = -103, 'NA' = -102, 'NR' = -101, 'NP' = -100, 'NL' = -99, 'NC' = -98, 'NZ' = -97, 'NI' = -96, 'NE' = -95, 'NG' = -94, 'NU' = -93, 'NF' = -92, 'MP' = -91, 'NO' = -90, 'OM' = -89, 'PK' = -88, 'PW' = -87, 'PS' = -86, 'PA' = -85, 'PG' = -84, 'PY' = -83, 'PE' = -82, 'PH' = -81, 'PN' = -80, 'PL' = -79, 'PT' = -78, 'PR' = -77, 'QA' = -76, 'RE' = -75, 'RO' = -74, 'RU' = -73, 'RW' = -72, 'BL' = -71, 'SH' = -70, 'KN' = -69, 'LC' = -68, 'MF' = -67, 'PM' = -66, 'VC' = -65, 'WS' = -64, 'SM' = -63, 'ST' = -62, 'SA' = -61, 'SN' = -60, 'RS' = -59, 'SC' = -58, 'SL' = -57, 'SG' = -56, 'SX' = -55, 'SK' = -54, 'SI' = -53, 'SB' = -52, 'SO' = -51, 'ZA' = -50, 'GS' = -49, 'SS' = -48, 'ES' = -47, 'LK' = -46, 'SD' = -45, 'SR' = -44, 'SJ' = -43, 'SZ' = -42, 'SE' = -41, 'CH' = -40, 'SY' = -39, 'TW' = -38, 'TJ' = -37, 'TZ' = -36, 'TH' = -35, 'TL' = -34, 'TG' = -33, 'TK' = -32, 'TO' = -31, 'TT' = -30, 'TN' = -29, 'TR' = -28, 'TM' = -27, 'TC' = -26, 'TV' = -25, 'UG' = -24, 'UA' = -23, 'AE' = -22, 'GB' = -21, 'UM' = -20, 'US' = -19, 'UY' = -18, 'UZ' = -17, 'VU' = -16, 'VE' = -15, 'VN' = -14, 'VG' = -13, 'VI' = -12, 'WF' = -11, 'EH' = -10, 'YE' = -9, 'ZM' = -8, 'ZW' = -7, 'OTHER' = 0, 'AF' = 1, 'AX' = 2, 'AL' = 3, 'DZ' = 4, 'AS' = 5, 'AD' = 6, 'AO' = 7, 'AI' = 8, 'AQ' = 9, 'AG' = 10, 'AR' = 11, 'AM' = 12, 'AW' = 13, 'AU' = 14, 'AT' = 15, 'AZ' = 16, 'BS' = 17, 'BH' = 18, 'BD' = 19, 'BB' = 20, 'BY' = 21, 'BE' = 22, 'BZ' = 23, 'BJ' = 24, 'BM' = 25, 'BT' = 26, 'BO' = 27, 'BQ' = 28, 'BA' = 29, 'BW' = 30, 'BV' = 31, 'BR' = 32, 'IO' = 33, 'BN' = 34, 'BG' = 35, 'BF' = 36, 'BI' = 37, 'CV' = 38, 'KH' = 39, 'CM' = 40, 'CA' = 41, 'KY' = 42, 'CF' = 43, 'TD' = 44, 'CL' = 45, 'CN' = 46, 'CX' = 47, 'CC' = 48, 'CO' = 49, 'KM' = 50, 'CD' = 51, 'CG' = 52, 'CK' = 53, 'CR' = 54, 'CI' = 55, 'HR' = 56, 'CU' = 57, 'CW' = 58, 'CY' = 59, 'CZ' = 60, 'DK' = 61, 'DJ' = 62, 'DM' = 63, 'DO' = 64, 'EC' = 65, 'EG' = 66, 'SV' = 67, 'GQ' = 68, 'ER' = 69, 'EE' = 70, 'ET' = 71, 'FK' = 72, 'FO' = 73, 'FJ' = 74, 'FI' = 75, 'FR' = 76, 'GF' = 77, 'PF' = 78, 'TF' = 79, 'GA' = 80, 'GM' = 81, 'GE' = 82, 'DE' = 83, 'GH' = 84, 'GI' = 85, 'GR' = 86, 'GL' = 87, 'GD' = 88, 'GP' = 89, 'GU' = 90, 'GT' = 91, 'GG' = 92, 'GN' = 93, 'GW' = 94, 'GY' = 95, 'HT' = 96, 'HM' = 97, 'VA' = 98, 'HN' = 99, 'HK' = 100, 'HU' = 101, 'IS' = 102, 'IN' = 103, 'ID' = 104, 'IR' = 105, 'IQ' = 106, 'IE' = 107, 'IM' = 108, 'IL' = 109, 'IT' = 110, 'JM' = 111, 'JP' = 112, 'JE' = 113, 'JO' = 114, 'KZ' = 115, 'KE' = 116, 'KI' = 117, 'KP' = 118, 'KR' = 119, 'KW' = 120, 'KG' = 121, 'LA' = 122, 'LV' = 123, 'LB' = 124, 'LS' = 125, 'LR' = 126, 'LY' = 127) + ) + ENGINE = MergeTree + ORDER BY tuple() + SETTINGS index_granularity = 8192 + + INSERT INTO iso_3166_1_alpha_2 SELECT (rand(number) % 256) - 128 FROM numbers(200000000) + OPTIMIZE TABLE iso_3166_1_alpha_2 FINAL + + SELECT count() FROM iso_3166_1_alpha_2 WHERE c NOT IN ('CU', 'BN', 'VI', 'US', 'AQ', 'AG', 'AR', 'AM', 'AW', 'AU', 'AT', 'AZ', 'BS', 'BH', 'BD', 'BB', 'BY', 'BE') FORMAT Null SETTINGS max_threads = 1 + + DROP TABLE IF EXISTS iso_3166_1_alpha_2 + diff --git a/tests/performance/merge_tree_insert.xml b/tests/performance/merge_tree_insert.xml index 1e987d27d50..3e1d2541480 100644 --- a/tests/performance/merge_tree_insert.xml +++ b/tests/performance/merge_tree_insert.xml @@ -18,15 +18,29 @@ merge_tree_insert_6 + + + decimal_primary_key_table_name + + merge_tree_insert_7 + merge_tree_insert_8 + merge_tree_insert_9 + + CREATE TABLE merge_tree_insert_1 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1) CREATE TABLE merge_tree_insert_2 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2) CREATE TABLE merge_tree_insert_3 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + CREATE TABLE merge_tree_insert_4 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1) CREATE TABLE merge_tree_insert_5 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2) CREATE TABLE merge_tree_insert_6 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + CREATE TABLE merge_tree_insert_7 (value_1 Decimal64(8), value_2 Decimal64(8), value_3 Decimal64(8)) ENGINE = MergeTree ORDER BY (value_1) + CREATE TABLE merge_tree_insert_8 (value_1 Decimal64(8), value_2 Decimal64(8), value_3 Decimal64(8)) ENGINE = MergeTree ORDER BY (value_1, value_2) + CREATE TABLE merge_tree_insert_9 (value_1 Decimal64(8), value_2 Decimal64(8), value_3 Decimal64(8)) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 500000 INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1000000 INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1500000 @@ -35,7 +49,12 @@ INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1000000 INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1500000 + INSERT INTO {decimal_primary_key_table_name} SELECT rand64(0) % 1000000, rand64(1) % 1500000, rand64(2) % 2000000 FROM system.numbers LIMIT 500000 + INSERT INTO {decimal_primary_key_table_name} SELECT rand64(0) % 1000000, rand64(1) % 1500000, rand64(2) % 2000000 FROM system.numbers LIMIT 1000000 + INSERT INTO {decimal_primary_key_table_name} SELECT rand64(0) % 1000000, rand64(1) % 1500000, rand64(2) % 2000000 FROM system.numbers LIMIT 1500000 + DROP TABLE IF EXISTS {integer_primary_key_table_name} DROP TABLE IF EXISTS {string_primary_key_table_name} + DROP TABLE IF EXISTS {decimal_primary_key_table_name} diff --git a/tests/performance/orc_filter_push_down.xml b/tests/performance/orc_filter_push_down.xml new file mode 100644 index 00000000000..9f49c20a075 --- /dev/null +++ b/tests/performance/orc_filter_push_down.xml @@ -0,0 +1,26 @@ + + + 1 + 10000 + + + + create table test_orc_fpd + ( + a Nullable(Int64), + b Nullable(String) + ) Engine=File(ORC) + + + + insert into test_orc_fpd select number as a, cast(number as String) as b from numbers(10000000) + + + DROP TABLE IF EXISTS test_orc_fpd + + + select a % 10, length(b) % 10, count(1) from test_orc_fpd where a > 9000000 group by a % 10, length(b) % 10 + + + select a % 10, length(b) % 10, count(1) from test_orc_fpd where a in (9000000, 1000) group by a % 10, length(b) % 10 + \ No newline at end of file diff --git a/tests/performance/sort_patterns.xml b/tests/performance/sort_patterns.xml new file mode 100644 index 00000000000..fc49b20cc8c --- /dev/null +++ b/tests/performance/sort_patterns.xml @@ -0,0 +1,28 @@ + + + + integer_type + + UInt32 + UInt64 + + + + sort_expression + + key + key, value + key DESC + key DESC, value DESC + + + + + CREATE TABLE sequential_{integer_type} (key {integer_type}, value {integer_type}) Engine = Memory + + INSERT INTO sequential_{integer_type} SELECT number, number FROM numbers(500000000) + + SELECT key, value FROM sequential_{integer_type} ORDER BY {sort_expression} FORMAT Null + + DROP TABLE IF EXISTS sequential_{integer_type} + diff --git a/tests/queries/0_stateless/00063_check_query.reference b/tests/queries/0_stateless/00063_check_query.reference index 6ed281c757a..e8183f05f5d 100644 --- a/tests/queries/0_stateless/00063_check_query.reference +++ b/tests/queries/0_stateless/00063_check_query.reference @@ -1,2 +1,3 @@ 1 1 +1 diff --git a/tests/queries/0_stateless/00063_check_query.sql b/tests/queries/0_stateless/00063_check_query.sql index e7362074a05..90711943150 100644 --- a/tests/queries/0_stateless/00063_check_query.sql +++ b/tests/queries/0_stateless/00063_check_query.sql @@ -8,6 +8,12 @@ INSERT INTO check_query_tiny_log VALUES (1, 'A'), (2, 'B'), (3, 'C'); CHECK TABLE check_query_tiny_log; +CHECK TABLE check_query_tiny_log PARTITION tuple(); -- { serverError NOT_IMPLEMENTED } +CHECK TABLE check_query_tiny_log PART 'all_0_0_0'; -- { serverError NOT_IMPLEMENTED } + +-- Settings and FORMAT are supported +CHECK TABLE check_query_tiny_log SETTINGS max_threads = 16; +CHECK TABLE check_query_tiny_log FORMAT Null SETTINGS max_threads = 8, check_query_single_value_result = 0; DROP TABLE IF EXISTS check_query_log; diff --git a/tests/queries/0_stateless/00419_show_sql_queries.sh b/tests/queries/0_stateless/00419_show_sql_queries.sh index 99252eeb1ba..2b7f73932e2 100755 --- a/tests/queries/0_stateless/00419_show_sql_queries.sh +++ b/tests/queries/0_stateless/00419_show_sql_queries.sh @@ -9,3 +9,4 @@ $CLICKHOUSE_CLIENT -q "SHOW DATABASES" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW TABLES" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW ENGINES" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW FUNCTIONS" &>/dev/null +$CLICKHOUSE_CLIENT -q "SHOW MERGES" &>/dev/null diff --git a/tests/queries/0_stateless/00961_check_table.reference b/tests/queries/0_stateless/00961_check_table.reference index d85c66db622..a0a054898b9 100644 --- a/tests/queries/0_stateless/00961_check_table.reference +++ b/tests/queries/0_stateless/00961_check_table.reference @@ -1,11 +1,17 @@ -201901_1_1_0 1 -======== -201901_1_1_0 1 +201801_1_1_0 1 201901_2_2_0 1 ======== -201901_1_2_1 1 +201801_1_1_0 1 +201901_2_2_0 1 +201901_3_3_0 1 ======== -201901_1_2_1 1 -201902_3_3_0 1 +201801_1_1_1 1 +201901_2_3_1 1 ======== -201902_3_4_1 1 +201801_1_1_1 1 +201901_2_3_1 1 +201902_4_4_0 1 +======== +201902_4_5_1 1 +======== +201801_1_1_0 1 diff --git a/tests/queries/0_stateless/00961_check_table.sql b/tests/queries/0_stateless/00961_check_table.sql index 0e0b2c3b483..a6abe8103d5 100644 --- a/tests/queries/0_stateless/00961_check_table.sql +++ b/tests/queries/0_stateless/00961_check_table.sql @@ -3,29 +3,31 @@ DROP TABLE IF EXISTS mt_table; CREATE TABLE mt_table (d Date, key UInt64, data String) ENGINE = MergeTree() PARTITION BY toYYYYMM(d) ORDER BY key; -CHECK TABLE mt_table; +CHECK TABLE mt_table SETTINGS max_threads = 1; + +INSERT INTO mt_table VALUES (toDate('2018-01-01'), 1, 'old'); INSERT INTO mt_table VALUES (toDate('2019-01-02'), 1, 'Hello'), (toDate('2019-01-02'), 2, 'World'); -CHECK TABLE mt_table; +CHECK TABLE mt_table SETTINGS max_threads = 1; INSERT INTO mt_table VALUES (toDate('2019-01-02'), 3, 'quick'), (toDate('2019-01-02'), 4, 'brown'); SELECT '========'; -CHECK TABLE mt_table; +CHECK TABLE mt_table SETTINGS max_threads = 1; OPTIMIZE TABLE mt_table FINAL; SELECT '========'; -CHECK TABLE mt_table; +CHECK TABLE mt_table SETTINGS max_threads = 1; SELECT '========'; INSERT INTO mt_table VALUES (toDate('2019-02-03'), 5, '!'), (toDate('2019-02-03'), 6, '?'); -CHECK TABLE mt_table; +CHECK TABLE mt_table SETTINGS max_threads = 1; SELECT '========'; @@ -33,6 +35,10 @@ INSERT INTO mt_table VALUES (toDate('2019-02-03'), 7, 'jump'), (toDate('2019-02- OPTIMIZE TABLE mt_table FINAL; -CHECK TABLE mt_table PARTITION 201902; +CHECK TABLE mt_table PARTITION 201902 SETTINGS max_threads = 1; + +SELECT '========'; + +CHECK TABLE mt_table PART '201801_1_1_0'; DROP TABLE IF EXISTS mt_table; diff --git a/tests/queries/0_stateless/01018_ip_dictionary_long.sql b/tests/queries/0_stateless/01018_ip_dictionary_long.sql index bb7f120163c..43025038f87 100644 --- a/tests/queries/0_stateless/01018_ip_dictionary_long.sql +++ b/tests/queries/0_stateless/01018_ip_dictionary_long.sql @@ -37,7 +37,9 @@ CREATE DICTIONARY {CLICKHOUSE_DATABASE:Identifier}.dict_ipv4_trie PRIMARY KEY prefix SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db currentDatabase() table 'table_ipv4_trie')) LAYOUT(IP_TRIE()) -LIFETIME(MIN 10 MAX 100); +LIFETIME(MIN 10 MAX 100) +SETTINGS(dictionary_use_async_executor=1, max_threads=8) +; -- fuzzer SELECT '127.0.0.0/24' = dictGetString({CLICKHOUSE_DATABASE:String} || '.dict_ipv4_trie', 'prefixprefixprefixprefix', tuple(IPv4StringToNumOrDefault('127.0.0.0127.0.0.0'))); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql b/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql index b836f806170..6e539774e4c 100644 --- a/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql +++ b/tests/queries/0_stateless/01037_zookeeper_check_table_empty_pk.sql @@ -10,7 +10,7 @@ CREATE TABLE mt_without_pk (SomeField1 Int64, SomeField2 Double) ENGINE = MergeT INSERT INTO mt_without_pk VALUES (1, 2); -CHECK TABLE mt_without_pk; +CHECK TABLE mt_without_pk SETTINGS max_threads = 1; DROP TABLE IF EXISTS mt_without_pk SYNC; @@ -20,6 +20,6 @@ CREATE TABLE replicated_mt_without_pk (SomeField1 Int64, SomeField2 Double) ENGI INSERT INTO replicated_mt_without_pk VALUES (1, 2); -CHECK TABLE replicated_mt_without_pk; +CHECK TABLE replicated_mt_without_pk SETTINGS max_threads = 1; DROP TABLE IF EXISTS replicated_mt_without_pk SYNC; diff --git a/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql b/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql index b66aff8384d..eccb2d25878 100644 --- a/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql +++ b/tests/queries/0_stateless/01042_check_query_and_last_granule_size.sql @@ -7,11 +7,11 @@ CREATE TABLE check_query_test (SomeKey UInt64, SomeValue String) ENGINE = MergeT -- Rows in this table are short, so granularity will be 8192. INSERT INTO check_query_test SELECT number, toString(number) FROM system.numbers LIMIT 81920; -CHECK TABLE check_query_test; +CHECK TABLE check_query_test SETTINGS max_threads = 1; OPTIMIZE TABLE check_query_test; -CHECK TABLE check_query_test; +CHECK TABLE check_query_test SETTINGS max_threads = 1; DROP TABLE IF EXISTS check_query_test; @@ -21,18 +21,18 @@ CREATE TABLE check_query_test_non_adaptive (SomeKey UInt64, SomeValue String) EN INSERT INTO check_query_test_non_adaptive SELECT number, toString(number) FROM system.numbers LIMIT 81920; -CHECK TABLE check_query_test_non_adaptive; +CHECK TABLE check_query_test_non_adaptive SETTINGS max_threads = 1; OPTIMIZE TABLE check_query_test_non_adaptive; -CHECK TABLE check_query_test_non_adaptive; +CHECK TABLE check_query_test_non_adaptive SETTINGS max_threads = 1; INSERT INTO check_query_test_non_adaptive SELECT number, toString(number) FROM system.numbers LIMIT 77; -CHECK TABLE check_query_test_non_adaptive; +CHECK TABLE check_query_test_non_adaptive SETTINGS max_threads = 1; OPTIMIZE TABLE check_query_test_non_adaptive; -CHECK TABLE check_query_test_non_adaptive; +CHECK TABLE check_query_test_non_adaptive SETTINGS max_threads = 1; DROP TABLE IF EXISTS check_query_test_non_adaptive; diff --git a/tests/queries/0_stateless/01112_check_table_with_index.sql b/tests/queries/0_stateless/01112_check_table_with_index.sql index e9613df7d1a..8b59466926f 100644 --- a/tests/queries/0_stateless/01112_check_table_with_index.sql +++ b/tests/queries/0_stateless/01112_check_table_with_index.sql @@ -10,6 +10,6 @@ CREATE TABLE check_table_with_indices ( INSERT INTO check_table_with_indices VALUES (0, 'test'), (1, 'test2'); -CHECK TABLE check_table_with_indices; +CHECK TABLE check_table_with_indices SETTINGS max_threads = 1; DROP TABLE check_table_with_indices; diff --git a/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql b/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql index 63d9a11daaa..3e859717873 100644 --- a/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql +++ b/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql @@ -5,7 +5,7 @@ WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT materiali WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE DT64 = materialize(S); -- {serverError 43} WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE materialize(S) = DT64; -- {serverError 43} -SELECT * WHERE toDateTime64(123.345, 3) == 'ABCD'; -- {serverError 53} -- invalid DateTime64 string +SELECT * WHERE toDateTime64(123.345, 3) == 'ABCD'; -- {serverError 41} -- invalid DateTime64 string SELECT * WHERE toDateTime64(123.345, 3) == '2020-02-05 14:34:12.33333333333333333333333333333333333333333333333333333333'; SELECT 'in SELECT'; diff --git a/tests/queries/0_stateless/01390_check_table_codec.sql b/tests/queries/0_stateless/01390_check_table_codec.sql index 639d5bea6e4..83a18b4f140 100644 --- a/tests/queries/0_stateless/01390_check_table_codec.sql +++ b/tests/queries/0_stateless/01390_check_table_codec.sql @@ -4,12 +4,12 @@ DROP TABLE IF EXISTS check_codec; CREATE TABLE check_codec(a Int, b Int CODEC(Delta, ZSTD)) ENGINE = MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part = 0; INSERT INTO check_codec SELECT number, number * 2 FROM numbers(1000); -CHECK TABLE check_codec; +CHECK TABLE check_codec SETTINGS max_threads = 1; DROP TABLE check_codec; CREATE TABLE check_codec(a Int, b Int CODEC(Delta, ZSTD)) ENGINE = MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part = '10M'; INSERT INTO check_codec SELECT number, number * 2 FROM numbers(1000); -CHECK TABLE check_codec; +CHECK TABLE check_codec SETTINGS max_threads = 1; DROP TABLE check_codec; diff --git a/tests/queries/0_stateless/01556_accurate_cast_or_null.reference b/tests/queries/0_stateless/01556_accurate_cast_or_null.reference index 31a9c37421e..a2ccd5af868 100644 --- a/tests/queries/0_stateless/01556_accurate_cast_or_null.reference +++ b/tests/queries/0_stateless/01556_accurate_cast_or_null.reference @@ -42,3 +42,21 @@ 2023-05-30 2149-06-06 1970-01-20 +\N +\N +\N +true +false +true +false +true +false +\N +\N +\N +192.0.2.1 +\N +\N +::ffff:192.0.2.1 +2001:db8::1 +\N diff --git a/tests/queries/0_stateless/01556_accurate_cast_or_null.sql b/tests/queries/0_stateless/01556_accurate_cast_or_null.sql index f00f6ef837f..2fb7b1177e6 100644 --- a/tests/queries/0_stateless/01556_accurate_cast_or_null.sql +++ b/tests/queries/0_stateless/01556_accurate_cast_or_null.sql @@ -49,3 +49,24 @@ SELECT accurateCastOrNull('1xxx', 'Date'); SELECT accurateCastOrNull('2023-05-30', 'Date'); SELECT accurateCastOrNull('2180-01-01', 'Date'); SELECT accurateCastOrNull(19, 'Date'); + +select accurateCastOrNull('test', 'Bool'); +select accurateCastOrNull('truex', 'Bool'); +select accurateCastOrNull('xfalse', 'Bool'); +select accurateCastOrNull('true', 'Bool'); +select accurateCastOrNull('false', 'Bool'); +select accurateCastOrNull('1', 'Bool'); +select accurateCastOrNull('0', 'Bool'); +select accurateCastOrNull(1, 'Bool'); +select accurateCastOrNull(0, 'Bool'); + +select accurateCastOrNull('test', 'IPv4'); +select accurateCastOrNull('2001:db8::1', 'IPv4'); +select accurateCastOrNull('::ffff:192.0.2.1', 'IPv4'); +select accurateCastOrNull('192.0.2.1', 'IPv4'); +select accurateCastOrNull('192.0.2.1x', 'IPv4'); + +select accurateCastOrNull('test', 'IPv6'); +select accurateCastOrNull('192.0.2.1', 'IPv6'); +select accurateCastOrNull('2001:db8::1', 'IPv6'); +select accurateCastOrNull('2001:db8::1x', 'IPv6'); diff --git a/tests/queries/0_stateless/01601_accurate_cast.reference b/tests/queries/0_stateless/01601_accurate_cast.reference index 3c6dceb1f16..dbf9666f4cd 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.reference +++ b/tests/queries/0_stateless/01601_accurate_cast.reference @@ -10,3 +10,15 @@ 1970-01-01 00:00:19 2023-05-30 1970-01-20 +\N +true +false +true +false +true +false +\N +192.0.2.1 +\N +::ffff:192.0.2.1 +2001:db8::1 diff --git a/tests/queries/0_stateless/01601_accurate_cast.sql b/tests/queries/0_stateless/01601_accurate_cast.sql index 2108e42df05..d2ecede2402 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.sql +++ b/tests/queries/0_stateless/01601_accurate_cast.sql @@ -34,3 +34,27 @@ SELECT accurateCast(0xFFFFFFFF + 1, 'Date'); -- { serverError CANNOT_CONVERT_T SELECT accurateCast('1xxx', 'Date'); -- { serverError CANNOT_PARSE_DATE } SELECT accurateCast('2023-05-30', 'Date'); SELECT accurateCast(19, 'Date'); + +select accurateCast('test', 'Nullable(Bool)'); +select accurateCast('test', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } +select accurateCast('truex', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } +select accurateCast('xfalse', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } +select accurateCast('true', 'Bool'); +select accurateCast('false', 'Bool'); +select accurateCast('1', 'Bool'); +select accurateCast('0', 'Bool'); +select accurateCast(1, 'Bool'); +select accurateCast(0, 'Bool'); + +select accurateCast('test', 'Nullable(IPv4)'); +select accurateCast('test', 'IPv4'); -- { serverError CANNOT_PARSE_IPV4 } +select accurateCast('2001:db8::1', 'IPv4'); -- { serverError CANNOT_PARSE_IPV4 } +select accurateCast('::ffff:192.0.2.1', 'IPv4'); -- { serverError CANNOT_PARSE_IPV4 } +select accurateCast('192.0.2.1', 'IPv4'); +select accurateCast('192.0.2.1x', 'IPv4'); -- { serverError CANNOT_PARSE_IPV4 } + +select accurateCast('test', 'Nullable(IPv6)'); +select accurateCast('test', 'IPv6'); -- { serverError CANNOT_PARSE_IPV6 } +select accurateCast('192.0.2.1', 'IPv6'); +select accurateCast('2001:db8::1', 'IPv6'); +select accurateCast('2001:db8::1x', 'IPv6'); -- { serverError CANNOT_PARSE_IPV6 } diff --git a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql index 7d1fc60e90d..430f3a86dc1 100644 --- a/tests/queries/0_stateless/01676_range_hashed_dictionary.sql +++ b/tests/queries/0_stateless/01676_range_hashed_dictionary.sql @@ -29,7 +29,9 @@ PRIMARY KEY CountryID SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_range_dict')) LIFETIME(MIN 1 MAX 1000) LAYOUT(RANGE_HASHED()) -RANGE(MIN StartDate MAX EndDate); +RANGE(MIN StartDate MAX EndDate) +SETTINGS(dictionary_use_async_executor=1, max_threads=8) +; SELECT 'Dictionary not nullable'; SELECT 'dictGet'; diff --git a/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql b/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql index c6133e7a5ee..9ba8a6de796 100644 --- a/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql +++ b/tests/queries/0_stateless/01681_cache_dictionary_simple_key.sql @@ -24,7 +24,9 @@ CREATE DICTIONARY 01681_database_for_cache_dictionary.cache_dictionary_simple_ke PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_simple_attributes_source_table')) LIFETIME(MIN 1 MAX 1000) -LAYOUT(CACHE(SIZE_IN_CELLS 10)); +LAYOUT(CACHE(SIZE_IN_CELLS 10)) +SETTINGS(dictionary_use_async_executor=1, max_threads=8) +; SELECT 'Dictionary cache_dictionary_simple_key_simple_attributes'; SELECT 'dictGet existing value'; diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference index ecea0a9f69f..c07ef5ca631 100644 --- a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference +++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference @@ -1,4 +1,4 @@ -1940-10-09 21:13:17.6 +1940-10-09 21:13:16.4 2284-06-04 23:46:43.6 2299-12-31 23:40:00.1 1900-01-01 00:00:00.9 diff --git a/tests/queries/0_stateless/01710_projection_part_check.sql b/tests/queries/0_stateless/01710_projection_part_check.sql index b15d9d7525e..8f496ddbeb5 100644 --- a/tests/queries/0_stateless/01710_projection_part_check.sql +++ b/tests/queries/0_stateless/01710_projection_part_check.sql @@ -5,7 +5,7 @@ create table tp (x Int32, y Int32, projection p (select x, y order by x)) engine insert into tp select number, number from numbers(3); insert into tp select number, number from numbers(5); -check table tp settings check_query_single_value_result=0; +check table tp settings check_query_single_value_result=0, max_threads=1; drop table tp; @@ -13,7 +13,7 @@ create table tp (p Date, k UInt64, v1 UInt64, v2 Int64, projection p1 ( select p insert into tp (p, k, v1, v2) values ('2018-05-15', 1, 1000, 2000), ('2018-05-16', 2, 3000, 4000), ('2018-05-17', 3, 5000, 6000), ('2018-05-18', 4, 7000, 8000); -check table tp settings check_query_single_value_result=0; +check table tp settings check_query_single_value_result=0, max_threads=1; drop table tp; @@ -22,5 +22,5 @@ create table tp (x int, projection p (select sum(x))) engine = MergeTree order b insert into tp values (1), (2), (3), (4); select part_type from system.parts where database = currentDatabase() and table = 'tp'; select part_type from system.projection_parts where database = currentDatabase() and table = 'tp'; -check table tp settings check_query_single_value_result=0; +check table tp settings check_query_single_value_result=0, max_threads=1; drop table tp; diff --git a/tests/queries/0_stateless/01760_polygon_dictionaries.sql b/tests/queries/0_stateless/01760_polygon_dictionaries.sql index 1589a091ee7..e74b3ce03b9 100644 --- a/tests/queries/0_stateless/01760_polygon_dictionaries.sql +++ b/tests/queries/0_stateless/01760_polygon_dictionaries.sql @@ -29,7 +29,9 @@ CREATE DICTIONARY 01760_db.dict_array PRIMARY KEY key SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons' DB '01760_db')) LIFETIME(0) -LAYOUT(POLYGON()); +LAYOUT(POLYGON()) +SETTINGS(dictionary_use_async_executor=1, max_threads=8) +; SELECT 'dictGet'; diff --git a/tests/queries/0_stateless/01765_hashed_dictionary_simple_key.sql b/tests/queries/0_stateless/01765_hashed_dictionary_simple_key.sql index db3431b1572..0b12b2fc8c9 100644 --- a/tests/queries/0_stateless/01765_hashed_dictionary_simple_key.sql +++ b/tests/queries/0_stateless/01765_hashed_dictionary_simple_key.sql @@ -24,7 +24,8 @@ CREATE DICTIONARY 01765_db.hashed_dictionary_simple_key_simple_attributes PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_simple_attributes_source_table')) LIFETIME(MIN 1 MAX 1000) -LAYOUT(HASHED()); +LAYOUT(HASHED()) +SETTINGS(dictionary_use_async_executor=1, max_threads=8); SELECT 'Dictionary hashed_dictionary_simple_key_simple_attributes'; SELECT 'dictGet existing value'; diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index 219fdf04472..6c4220855c8 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -1,33 +1,227 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import socket import os -import sys +import uuid -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CLIENT_NAME = "simple native protocol" -from tcp_client import ( - TCPClient, - CLICKHOUSE_DATABASE, - writeVarUInt, - writeStringBinary, - serializeBlockInfo, - assertPacket, -) + +def writeVarUInt(x, ba): + for _ in range(0, 9): + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, "utf-8") + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size=1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + + +def readUInt8(s): + return readUInt(s) + + +def readUInt16(s): + return readUInt(s, 2) + + +def readUInt32(s): + return readUInt(s, 4) + + +def readUInt64(s): + return readUInt(s, 8) + + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode("utf-8") + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary(CLIENT_NAME, ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("default", ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert p_type == 0 # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + + +def serializeClientInfo(ba, query_id): + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary(CLIENT_NAME, ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def assertPacket(packet, expected): + assert packet == expected, packet + + +def readHeader(s): + packet_type = readVarUInt(s) + if packet_type == 2: # Exception + raise RuntimeError(readException(s)) + assertPacket(packet_type, 1) # Data + + readStringBinary(s) # external table name + # BlockInfo + assertPacket(readVarUInt(s), 1) # 1 + assertPacket(readUInt8(s), 0) # is_overflows + assertPacket(readVarUInt(s), 2) # 2 + assertPacket(readUInt32(s), 4294967295) # bucket_num + assertPacket(readVarUInt(s), 0) # 0 + columns = readVarUInt(s) # rows + rows = readVarUInt(s) # columns + print("Rows {} Columns {}".format(rows, columns)) + for _ in range(columns): + col_name = readStringBinary(s) + type_name = readStringBinary(s) + print("Column {} type {}".format(col_name, type_name)) + + +def readException(s): + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def insertValidLowCardinalityRow(): - with TCPClient() as client: - client.sendQuery( + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery( + s, "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - client.sendEmptyBlock() - client.readHeader() + sendEmptyBlock(s) + readHeader(s) # Data ba = bytearray() @@ -46,25 +240,31 @@ def insertValidLowCardinalityRow(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (0 for 'hello') - client.send(ba) + s.sendall(ba) # Fin block - client.sendEmptyBlock() + sendEmptyBlock(s) - assertPacket(client.readVarUInt(), 5) # End of stream + assertPacket(readVarUInt(s), 5) # End of stream + s.close() def insertLowCardinalityRowWithIndexOverflow(): - with TCPClient() as client: - client.sendQuery( + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery( + s, "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - client.sendEmptyBlock() - client.readHeader() + sendEmptyBlock(s) + readHeader(s) # Data ba = bytearray() @@ -83,23 +283,29 @@ def insertLowCardinalityRowWithIndexOverflow(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 7 + [1]) # UInt64 index (overflow) - client.send(ba) + s.sendall(ba) - assertPacket(client.readVarUInt(), 2) # Exception - print(client.readException()) + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() def insertLowCardinalityRowWithIncorrectDictType(): - with TCPClient() as client: - client.sendQuery( + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery( + s, "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - client.sendEmptyBlock() - client.readHeader() + sendEmptyBlock(s) + readHeader(s) # Data ba = bytearray() @@ -118,23 +324,29 @@ def insertLowCardinalityRowWithIncorrectDictType(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (overflow) - client.send(ba) + s.sendall(ba) - assertPacket(client.readVarUInt(), 2) # Exception - print(client.readException()) + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() def insertLowCardinalityRowWithIncorrectAdditionalKeys(): - with TCPClient() as client: - client.sendQuery( + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery( + s, "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( CLICKHOUSE_DATABASE ), ) # external tables - client.sendEmptyBlock() - client.readHeader() + sendEmptyBlock(s) + readHeader(s) # Data ba = bytearray() @@ -153,10 +365,11 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): writeStringBinary("hello", ba) # key ba.extend([1] + [0] * 7) # num_indexes ba.extend([0] * 8) # UInt64 index (0 for 'hello') - client.send(ba) + s.sendall(ba) - assertPacket(client.readVarUInt(), 2) # Exception - print(client.readException()) + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() def main(): diff --git a/tests/queries/0_stateless/02026_accurate_cast_or_default.reference b/tests/queries/0_stateless/02026_accurate_cast_or_default.reference index 67be2da9975..c31e00dd8f4 100644 --- a/tests/queries/0_stateless/02026_accurate_cast_or_default.reference +++ b/tests/queries/0_stateless/02026_accurate_cast_or_default.reference @@ -30,3 +30,24 @@ 0 5 127 127 0 5 +\N +false +false +false +true +false +true +false +true +false +\N +0.0.0.0 +0.0.0.0 +0.0.0.0 +192.0.2.1 +0.0.0.0 +\N +:: +::ffff:192.0.2.1 +2001:db8::1 +:: diff --git a/tests/queries/0_stateless/02026_accurate_cast_or_default.sql b/tests/queries/0_stateless/02026_accurate_cast_or_default.sql index 1c35055749e..d493914c956 100644 --- a/tests/queries/0_stateless/02026_accurate_cast_or_default.sql +++ b/tests/queries/0_stateless/02026_accurate_cast_or_default.sql @@ -34,3 +34,27 @@ SELECT accurateCastOrDefault(nan, 'UInt64'), accurateCastOrDefault(nan, 'UInt64' SELECT accurateCastOrDefault(nan, 'UInt256'), accurateCastOrDefault(nan, 'UInt256', toUInt256(5)); SELECT accurateCastOrDefault(number + 127, 'Int8') AS x, accurateCastOrDefault(number + 127, 'Int8', toInt8(5)) AS x_with_default FROM numbers (2) ORDER BY number; + +select accurateCastOrDefault('test', 'Nullable(Bool)'); +select accurateCastOrDefault('test', 'Bool'); +select accurateCastOrDefault('truex', 'Bool'); +select accurateCastOrDefault('xfalse', 'Bool'); +select accurateCastOrDefault('true', 'Bool'); +select accurateCastOrDefault('false', 'Bool'); +select accurateCastOrDefault('1', 'Bool'); +select accurateCastOrDefault('0', 'Bool'); +select accurateCastOrDefault(1, 'Bool'); +select accurateCastOrDefault(0, 'Bool'); + +select accurateCastOrDefault('test', 'Nullable(IPv4)'); +select accurateCastOrDefault('test', 'IPv4'); +select accurateCastOrDefault('2001:db8::1', 'IPv4'); +select accurateCastOrDefault('::ffff:192.0.2.1', 'IPv4'); +select accurateCastOrDefault('192.0.2.1', 'IPv4'); +select accurateCastOrDefault('192.0.2.1x', 'IPv4'); + +select accurateCastOrDefault('test', 'Nullable(IPv6)'); +select accurateCastOrDefault('test', 'IPv6'); +select accurateCastOrDefault('192.0.2.1', 'IPv6'); +select accurateCastOrDefault('2001:db8::1', 'IPv6'); +select accurateCastOrDefault('2001:db8::1x', 'IPv6'); diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql index 8d792836562..7d952223705 100644 --- a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql +++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql @@ -21,7 +21,8 @@ CREATE DICTIONARY hashed_array_dictionary_simple_key_simple_attributes PRIMARY KEY id SOURCE(CLICKHOUSE(TABLE 'simple_key_simple_attributes_source_table')) LAYOUT(HASHED_ARRAY()) -LIFETIME(MIN 1 MAX 1000); +LIFETIME(MIN 1 MAX 1000) +SETTINGS(dictionary_use_async_executor=1, max_threads=8); SELECT 'Dictionary hashed_array_dictionary_simple_key_simple_attributes'; SELECT 'dictGet existing value'; diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index f480236111f..41543d0706a 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -38,5 +38,4 @@ LazyOutputFormat 1 1 1 0 0 LimitsCheckingTransform 1 1 1 1 1 NullSource 1 0 0 0 0 NullSource 1 0 0 0 0 -NullSource 0 0 0 0 0 SourceFromSingleChunk 1 0 0 1 1 diff --git a/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql b/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql index 0ac97404c46..625be63e0c0 100644 --- a/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql +++ b/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql @@ -12,7 +12,6 @@ SELECT name, column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_02235' ORDER BY name, column; -SET check_query_single_value_result = 0; -CHECK TABLE t_sparse_02235; +CHECK TABLE t_sparse_02235 SETTINGS check_query_single_value_result = 0, max_threads = 1; DROP TABLE t_sparse_02235; diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference index 159ee805f26..717484d4670 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference @@ -17,7 +17,7 @@ true \N 0.0.0.0 \N -0.0.0.0 +\N \N \N \N @@ -25,8 +25,21 @@ true \N :: \N -:: +\N \N \N \N \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 +fuzzer issue +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql index 1cc5140f339..b56ebc2b09d 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql @@ -22,3 +22,9 @@ select toIPv6(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) fro select toIPv6OrDefault(number % 2 ? '' : NULL) from numbers(2); select toIPv6OrNull(number % 2 ? '' : NULL) from numbers(2); select IPv6StringToNum(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) from numbers(2); + +select 'fuzzer issue'; +SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2); +SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2); +SELECT accurateCastOrNull(if(number % 2, NULL, 'truex'), 'Bool') FROM numbers(4); +SELECT accurateCastOrNull(if(number % 2, 'truex', NULL), 'Bool') FROM numbers(4); diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 7f3a881e090..273c7c08907 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ 1 -102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 2 0 +102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 2 0 1 diff --git a/tests/queries/0_stateless/02373_datetime64_monotonicity.queries b/tests/queries/0_stateless/02373_datetime64_monotonicity.queries index 212198c89de..404e3391205 100644 --- a/tests/queries/0_stateless/02373_datetime64_monotonicity.queries +++ b/tests/queries/0_stateless/02373_datetime64_monotonicity.queries @@ -46,11 +46,11 @@ SELECT count() FROM dt64_monot_test WHERE toDateTime64(date_time,0) >= '2020-01- SELECT count() FROM dt64_monot_test WHERE toDateTime64(date_time,0) >= '2020-01-01 00:00:01.1' settings force_index_by_date = 1, force_primary_key = 1; create table dt64_monot_test_string(date_time String, x String) Engine=MergeTree order by date_time; -insert into dt64_monot_test_string select '2020-01-01 00:00', '' from numbers(1); -insert into dt64_monot_test_string select '2020-01-01 00:00:00.000000' , '' from numbers(10); +insert into dt64_monot_test_string select '2020-01-01 00:00:00.000000001', '' from numbers(1); +insert into dt64_monot_test_string select '2020-01-01 00:00:00.000', '' from numbers(10); -SELECT count() FROM dt64_monot_test_string WHERE toDateTime64(date_time,3) = '1970-01-01 00:00:00.000000000'; -SELECT count() FROM dt64_monot_test_string WHERE toDateTime64(date_time,3) = '1970-01-01 00:00:00.000000001'; +SELECT count() FROM dt64_monot_test_string WHERE toDateTime64(date_time,9) = '2020-01-01 00:00:00.000000000'; +SELECT count() FROM dt64_monot_test_string WHERE toDateTime64(date_time,3) = '2020-01-01 00:00:00.000000001'; SELECT count() FROM dt64_monot_test_string WHERE toDateTime64(date_time,9) = '2020-01-01 00:00:00'; drop table dt64_monot_test; diff --git a/tests/queries/0_stateless/02373_datetime64_monotonicity.reference b/tests/queries/0_stateless/02373_datetime64_monotonicity.reference index d9c310bdbc9..935ee685cc9 100644 --- a/tests/queries/0_stateless/02373_datetime64_monotonicity.reference +++ b/tests/queries/0_stateless/02373_datetime64_monotonicity.reference @@ -17,8 +17,8 @@ Asia/Tehran 10 0 0 -0 -0 +10 +11 10 UTC @@ -40,8 +40,8 @@ UTC 10 10 10 -1 -1 +10 +11 10 Canada/Atlantic @@ -63,8 +63,8 @@ Canada/Atlantic 10 10 10 -0 -0 +10 +11 10 Europe/Berlin @@ -86,7 +86,7 @@ Europe/Berlin 10 10 9 -0 -0 +10 +11 10 diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 4d2f01db55b..3213b8cd387 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -14,7 +14,7 @@ def main(): sock = socket(AF_INET, SOCK_STREAM) sock.connect((host, port)) sock.settimeout(60) - s = "POST /play HTTP/1.1\r\n" + s = "POST / HTTP/1.1\r\n" s += "Host: %s\r\n" % host s += "Content-type: multipart/form-data\r\n" s += "Transfer-encoding: chunked\r\n" diff --git a/tests/queries/0_stateless/02438_sync_replica_lightweight.reference b/tests/queries/0_stateless/02438_sync_replica_lightweight.reference index 25abaad13e2..00d7f58bdff 100644 --- a/tests/queries/0_stateless/02438_sync_replica_lightweight.reference +++ b/tests/queries/0_stateless/02438_sync_replica_lightweight.reference @@ -5,8 +5,9 @@ GET_PART all_1_1_0 MERGE_PARTS all_0_1_1 3 1 all_0_1_1 3 2 all_0_1_1 -4 1 all_0_1_1 -4 2 all_0_1_1 +4 1 +4 2 +MERGE_PARTS all_0_1_1 5 1 all_0_2_2 5 2 all_0_2_2 5 3 all_0_2_2 diff --git a/tests/queries/0_stateless/02438_sync_replica_lightweight.sql b/tests/queries/0_stateless/02438_sync_replica_lightweight.sql index 1da48d95d9b..beddc771141 100644 --- a/tests/queries/0_stateless/02438_sync_replica_lightweight.sql +++ b/tests/queries/0_stateless/02438_sync_replica_lightweight.sql @@ -24,7 +24,8 @@ system start replicated sends rmt1; system sync replica rmt2 lightweight; -- waits for fetches, not merges select type, new_part_name from system.replication_queue where database=currentDatabase() and table='rmt2' order by new_part_name; select 3, n, _part from rmt1 order by n; -select 4, n, _part from rmt2 order by n; +select 4, n from rmt2 order by n; +select type, new_part_name from system.replication_queue where database=currentDatabase() and table='rmt2' order by new_part_name; system start merges rmt2; system sync replica rmt2; diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index fdc64a8dba8..92240e109c1 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -1,30 +1,188 @@ #!/usr/bin/env python3 -import json +import socket import os -import sys +import uuid +import json -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CLIENT_NAME = "simple native protocol" -from tcp_client import TCPClient + +def writeVarUInt(x, ba): + for _ in range(0, 9): + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, "utf-8") + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size=1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + + +def readUInt8(s): + return readUInt(s) + + +def readUInt16(s): + return readUInt(s, 2) + + +def readUInt32(s): + return readUInt(s, 4) + + +def readUInt64(s): + return readUInt(s, 8) + + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode("utf-8") + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary(CLIENT_NAME, ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert p_type == 0 # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + + +def serializeClientInfo(ba, query_id): + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary(CLIENT_NAME, ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def assertPacket(packet, expected): + assert packet == expected, packet class Progress: - def __init__( - self, - read_rows=0, - read_bytes=0, - total_rows_to_read=0, - written_rows=0, - written_bytes=0, - ): + def __init__(self): # NOTE: this is done in ctor to initialize __dict__ - self.read_rows = read_rows - self.read_bytes = read_bytes - self.total_rows_to_read = total_rows_to_read - self.written_rows = written_rows - self.written_bytes = written_bytes + self.read_rows = 0 + self.read_bytes = 0 + self.total_rows_to_read = 0 + self.written_rows = 0 + self.written_bytes = 0 def __str__(self): return json.dumps(self.__dict__) @@ -37,6 +195,13 @@ class Progress: self.written_bytes += b.written_bytes return self + def readPacket(self, s): + self.read_rows += readVarUInt(s) + self.read_bytes += readVarUInt(s) + self.total_rows_to_read += readVarUInt(s) + self.written_rows += readVarUInt(s) + self.written_bytes += readVarUInt(s) + def __bool__(self): return ( self.read_rows > 0 @@ -47,25 +212,52 @@ class Progress: ) +def readProgress(s): + packet_type = readVarUInt(s) + if packet_type == 2: # Exception + raise RuntimeError(readException(s)) + + if packet_type == 5: # End stream + return None + + assertPacket(packet_type, 3) # Progress + + progress = Progress() + progress.readPacket(s) + return progress + + +def readException(s): + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) + + def main(): - with TCPClient() as client: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. - client.sendQuery( + sendQuery( + s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000", ) # external tables - client.sendEmptyBlock() + sendEmptyBlock(s) summary_progress = Progress() non_empty_progress_packets = 0 while True: - progress_info = client.readProgress() - if progress_info is None: + progress = readProgress(s) + if progress is None: break - - progress = Progress(*progress_info) summary_progress += progress if progress: non_empty_progress_packets += 1 @@ -76,6 +268,8 @@ def main(): # - 1 or 2 for each SELECT block assert non_empty_progress_packets in (3, 4), f"{non_empty_progress_packets=:}" + s.close() + if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02497_source_part_is_intact_when_mutation.sql b/tests/queries/0_stateless/02497_source_part_is_intact_when_mutation.sql index 39231c12881..960c6c168a1 100644 --- a/tests/queries/0_stateless/02497_source_part_is_intact_when_mutation.sql +++ b/tests/queries/0_stateless/02497_source_part_is_intact_when_mutation.sql @@ -14,7 +14,7 @@ INSERT INTO t_source_part_is_intact SELECT if (number % 11 = 0, number, 0) FROM numbers(2000); -CHECK TABLE t_source_part_is_intact; +CHECK TABLE t_source_part_is_intact SETTINGS max_threads = 1; SELECT 1, count() FROM t_source_part_is_intact; BEGIN TRANSACTION; @@ -22,18 +22,18 @@ BEGIN TRANSACTION; ALTER TABLE t_source_part_is_intact update u = 0 where u != 0; ROLLBACK; -CHECK TABLE t_source_part_is_intact; +CHECK TABLE t_source_part_is_intact SETTINGS max_threads = 1; BEGIN TRANSACTION; -- size of the file serialization.json is different in the new part ALTER TABLE t_source_part_is_intact update u = 1 WHERE 1; ROLLBACK; -CHECK TABLE t_source_part_is_intact; +CHECK TABLE t_source_part_is_intact SETTINGS max_threads = 1; DETACH TABLE t_source_part_is_intact; ATTACH TABLE t_source_part_is_intact; -CHECK TABLE t_source_part_is_intact; +CHECK TABLE t_source_part_is_intact SETTINGS max_threads = 1; DROP TABLE t_source_part_is_intact; diff --git a/tests/queries/0_stateless/02550_client_connections_credentials.sh b/tests/queries/0_stateless/02550_client_connections_credentials.sh index 3776216751e..cc023cad930 100755 --- a/tests/queries/0_stateless/02550_client_connections_credentials.sh +++ b/tests/queries/0_stateless/02550_client_connections_credentials.sh @@ -81,7 +81,13 @@ echo 'port' $CLICKHOUSE_CLIENT --config $CONFIG --connection test_port -q 'select tcpPort()' |& grep -F -o 'Connection refused (localhost:0).' $CLICKHOUSE_CLIENT --config $CONFIG --connection test_port --port $TEST_PORT -q 'select tcpPort()' echo 'secure' -$CLICKHOUSE_CLIENT --config $CONFIG --connection test_secure -q 'select tcpPort()' |& grep -c -F -o -e OPENSSL_internal:WRONG_VERSION_NUMBER -e 'tcp_secure protocol is disabled because poco library was built without NetSSL support.' + +if [ "`uname -m`" == 's390x' ]; then + $CLICKHOUSE_CLIENT --config $CONFIG --connection test_secure -q 'select tcpPort()' |& grep -c -F -o -e 'SSL routines::wrong version number' -e 'tcp_secure protocol is disabled because poco library was built without NetSSL support.' +else + $CLICKHOUSE_CLIENT --config $CONFIG --connection test_secure -q 'select tcpPort()' |& grep -c -F -o -e OPENSSL_internal:WRONG_VERSION_NUMBER -e 'tcp_secure protocol is disabled because poco library was built without NetSSL support.' +fi + echo 'database' $CLICKHOUSE_CLIENT --config $CONFIG --connection test_database -q 'select currentDatabase()' echo 'user' diff --git a/tests/queries/0_stateless/02894_MergeSortingPartialResultTransform_empty_block.reference b/tests/queries/0_stateless/02597_column_delete_and_replication.reference similarity index 100% rename from tests/queries/0_stateless/02894_MergeSortingPartialResultTransform_empty_block.reference rename to tests/queries/0_stateless/02597_column_delete_and_replication.reference diff --git a/tests/queries/0_stateless/02597_column_delete_and_replication.sql b/tests/queries/0_stateless/02597_column_delete_and_replication.sql new file mode 100644 index 00000000000..b0257f666d9 --- /dev/null +++ b/tests/queries/0_stateless/02597_column_delete_and_replication.sql @@ -0,0 +1,27 @@ +CREATE TABLE test ( + `c_id` String, + `p_id` String, + `d` String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/test_table', '1') +ORDER BY (c_id, p_id); + +INSERT INTO test SELECT '1', '11', '111' FROM numbers(3); + +INSERT INTO test SELECT '2', '22', '22' FROM numbers(3); + +set mutations_sync=0; + +ALTER TABLE test UPDATE d = d || toString(sleepEachRow(0.3)) where 1; + +ALTER TABLE test ADD COLUMN x UInt32 default 0; +ALTER TABLE test UPDATE d = d || '1' where x = 42; +ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; --{serverError 36} + +ALTER TABLE test UPDATE x = x + 1 where 1 SETTINGS mutations_sync = 2; + +ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; + +select * from test format Null; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02597_column_update_and_replication.reference b/tests/queries/0_stateless/02597_column_update_and_replication.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02597_column_update_and_replication.sql b/tests/queries/0_stateless/02597_column_update_and_replication.sql new file mode 100644 index 00000000000..42fe813f8a1 --- /dev/null +++ b/tests/queries/0_stateless/02597_column_update_and_replication.sql @@ -0,0 +1,27 @@ +CREATE TABLE test ( + `c_id` String, + `p_id` String, + `d` String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/test_table', '1') +ORDER BY (c_id, p_id); + +INSERT INTO test SELECT '1', '11', '111' FROM numbers(3); + +INSERT INTO test SELECT '2', '22', '22' FROM numbers(3); + +set mutations_sync=0; + +ALTER TABLE test UPDATE d = d || toString(sleepEachRow(0.3)) where 1; + +ALTER TABLE test ADD COLUMN x UInt32 default 0; +ALTER TABLE test UPDATE x = x + 1 where 1; +ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; --{serverError 36} + +ALTER TABLE test UPDATE x = x + 1 where 1 SETTINGS mutations_sync = 2; + +ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; + +select * from test format Null; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.reference b/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.sql b/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.sql new file mode 100644 index 00000000000..b07b3b54514 --- /dev/null +++ b/tests/queries/0_stateless/02597_column_update_tricy_expression_and_replication.sql @@ -0,0 +1,28 @@ +CREATE TABLE test ( + `c_id` String, + `p_id` String, + `d` UInt32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/test_table', '1') +ORDER BY (c_id, p_id); + +INSERT INTO test SELECT '1', '11', '111' FROM numbers(3); + +INSERT INTO test SELECT '2', '22', '22' FROM numbers(3); + +set mutations_sync=0; + +ALTER TABLE test UPDATE d = d + sleepEachRow(0.3) where 1; + +ALTER TABLE test ADD COLUMN x UInt32 default 0; +ALTER TABLE test UPDATE d = x + 1 where 1; +ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; --{serverError 36} + +ALTER TABLE test UPDATE x = x + 1 where 1 SETTINGS mutations_sync = 2; + +ALTER TABLE test DROP COLUMN x SETTINGS mutations_sync = 2; + +select * from test format Null; + +DROP TABLE test; + diff --git a/tests/queries/0_stateless/02597_projection_materialize_and_replication.reference b/tests/queries/0_stateless/02597_projection_materialize_and_replication.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02597_projection_materialize_and_replication.sql b/tests/queries/0_stateless/02597_projection_materialize_and_replication.sql new file mode 100644 index 00000000000..031cb3cb6fb --- /dev/null +++ b/tests/queries/0_stateless/02597_projection_materialize_and_replication.sql @@ -0,0 +1,28 @@ +CREATE TABLE test ( + `c_id` String, + `p_id` String, + `d` String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/test_table', '1') +ORDER BY (c_id, p_id); + +INSERT INTO test SELECT '1', '11', '111' FROM numbers(3); + +INSERT INTO test SELECT '2', '22', '22' FROM numbers(3); + +set mutations_sync=0; + +ALTER TABLE test UPDATE d = d || toString(sleepEachRow(0.3)) where 1; + +ALTER TABLE test ADD PROJECTION d_order ( SELECT min(c_id) GROUP BY `d`); +ALTER TABLE test MATERIALIZE PROJECTION d_order; +ALTER TABLE test DROP PROJECTION d_order SETTINGS mutations_sync = 2; --{serverError 36} + +-- just to wait prev mutation +ALTER TABLE test DELETE where d = 'Hello' SETTINGS mutations_sync = 2; + +ALTER TABLE test DROP PROJECTION d_order SETTINGS mutations_sync = 2; + +select * from test format Null; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02718_array_fold.sql b/tests/queries/0_stateless/02718_array_fold.sql index 7f20602a371..0486a5ce2e3 100644 --- a/tests/queries/0_stateless/02718_array_fold.sql +++ b/tests/queries/0_stateless/02718_array_fold.sql @@ -1,23 +1,24 @@ SELECT 'Negative tests'; SELECT arrayFold(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT arrayFold(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT arrayFold(1, toUInt64(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( x,acc -> x, emptyArrayString(), toInt8(0)); -- { serverError TYPE_MISMATCH } -SELECT arrayFold( x,acc -> x, 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( x,y,acc -> x, [0, 1], 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( x,acc -> x, [0, 1], [2, 3], toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( x,y,acc -> x, [0, 1], [2, 3, 4], toUInt8(0)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } +SELECT arrayFold(1, toUInt64(0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT arrayFold(1, emptyArrayUInt64(), toUInt64(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayFold( acc,x -> x, emptyArrayString(), toInt8(0)); -- { serverError TYPE_MISMATCH } +SELECT arrayFold( acc,x -> x, 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayFold( acc,x,y -> x, [0, 1], 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayFold( acc,x -> x, [0, 1], [2, 3], toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayFold( acc,x,y -> x, [0, 1], [2, 3, 4], toUInt8(0)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } SELECT 'Const arrays'; -SELECT arrayFold( x,acc -> acc+x*2, [1, 2, 3, 4], toInt64(3)); -SELECT arrayFold( x,acc -> acc+x*2, emptyArrayInt64(), toInt64(3)); -SELECT arrayFold( x,y,acc -> acc+x*2+y*3, [1, 2, 3, 4], [5, 6, 7, 8], toInt64(3)); -SELECT arrayFold( x,acc -> arrayPushBack(acc, x), [1, 2, 3, 4], emptyArrayInt64()); -SELECT arrayFold( x,acc -> arrayPushFront(acc, x), [1, 2, 3, 4], emptyArrayInt64()); -SELECT arrayFold( x,acc -> (arrayPushFront(acc.1, x),arrayPushBack(acc.2, x)), [1, 2, 3, 4], (emptyArrayInt64(), emptyArrayInt64())); -SELECT arrayFold( x,acc -> x%2 ? (arrayPushBack(acc.1, x), acc.2): (acc.1, arrayPushBack(acc.2, x)), [1, 2, 3, 4, 5, 6], (emptyArrayInt64(), emptyArrayInt64())); +SELECT arrayFold( acc,x -> acc+x*2, [1, 2, 3, 4], toInt64(3)); +SELECT arrayFold( acc,x -> acc+x*2, emptyArrayInt64(), toInt64(3)); +SELECT arrayFold( acc,x,y -> acc+x*2+y*3, [1, 2, 3, 4], [5, 6, 7, 8], toInt64(3)); +SELECT arrayFold( acc,x -> arrayPushBack(acc, x), [1, 2, 3, 4], emptyArrayInt64()); +SELECT arrayFold( acc,x -> arrayPushFront(acc, x), [1, 2, 3, 4], emptyArrayInt64()); +SELECT arrayFold( acc,x -> (arrayPushFront(acc.1, x),arrayPushBack(acc.2, x)), [1, 2, 3, 4], (emptyArrayInt64(), emptyArrayInt64())); +SELECT arrayFold( acc,x -> x%2 ? (arrayPushBack(acc.1, x), acc.2): (acc.1, arrayPushBack(acc.2, x)), [1, 2, 3, 4, 5, 6], (emptyArrayInt64(), emptyArrayInt64())); SELECT 'Non-const arrays'; -SELECT arrayFold( x,acc -> acc+x, range(number), number) FROM system.numbers LIMIT 5; -SELECT arrayFold( x,acc -> arrayPushFront(acc,x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5; -SELECT arrayFold( x,acc -> x%2 ? arrayPushFront(acc,x) : arrayPushBack(acc,x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5; +SELECT arrayFold( acc,x -> acc+x, range(number), number) FROM system.numbers LIMIT 5; +SELECT arrayFold( acc,x -> arrayPushFront(acc,x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5; +SELECT arrayFold( acc,x -> x%2 ? arrayPushFront(acc,x) : arrayPushBack(acc,x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5; diff --git a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python index 1736807410f..48b27d434ec 100644 --- a/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python +++ b/tests/queries/0_stateless/02750_settings_alias_tcp_protocol.python @@ -1,23 +1,217 @@ #!/usr/bin/env python3 - +import socket import os -import sys +import uuid +import json -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") +CLIENT_NAME = "simple native protocol" -from tcp_client import TCPClient + +def writeVarUInt(x, ba): + for _ in range(0, 9): + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + + +def writeStringBinary(s, ba): + b = bytes(s, "utf-8") + writeVarUInt(len(s), ba) + ba.extend(b) + + +def readStrict(s, size=1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + + +def readUInt8(s): + return readUInt(s) + + +def readUInt16(s): + return readUInt(s, 2) + + +def readUInt32(s): + return readUInt(s, 4) + + +def readUInt64(s): + return readUInt(s, 8) + + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode("utf-8") + + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary(CLIENT_NAME, ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert p_type == 0 # Hello + _server_name = readStringBinary(s) + _server_version_major = readVarUInt(s) + _server_version_minor = readVarUInt(s) + _server_revision = readVarUInt(s) + _server_timezone = readStringBinary(s) + _server_display_name = readStringBinary(s) + _server_version_patch = readVarUInt(s) + + +def serializeClientInfo(ba, query_id): + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary(CLIENT_NAME, ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + + +def sendQuery(s, query, settings): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + # Settings + for key, value in settings.items(): + writeStringBinary(key, ba) + writeVarUInt(1, ba) # is_important + writeStringBinary(str(value), ba) + writeStringBinary("", ba) # End of settings + + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + + +def assertPacket(packet, expected): + assert packet == expected, "Got: {}, expected: {}".format(packet, expected) + + +def readResponse(s): + packet_type = readVarUInt(s) + if packet_type == 2: # Exception + raise RuntimeError(readException(s)) + + if packet_type == 1: # Data + return None + if packet_type == 3: # Progress + return None + if packet_type == 5: # End stream + return None + + raise RuntimeError("Unexpected packet: {}".format(packet_type)) + + +def readException(s): + code = readUInt32(s) + _name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def main(): - with TCPClient() as client: - client.sendQuery("select 1", {"replication_alter_partitions_sync": 1}) + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "select 1", {"replication_alter_partitions_sync": 1}) # external tables - client.sendEmptyBlock() + sendEmptyBlock(s) - while client.readResponse() is not None: + while readResponse(s) is not None: pass + + s.close() print("OK") diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python deleted file mode 100755 index 61ba0e14605..00000000000 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.python +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - - -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from tcp_client import TCPClient - - -def run_query_without_errors(query, support_partial_result): - with TCPClient() as client: - client.sendQuery(query, settings={"allow_experimental_partial_result": True}) - - # external tables - client.sendEmptyBlock() - client.readHeader() - - # Partial result - partial_result = client.readDataWithoutProgress()[0] - if support_partial_result: - assert ( - len(partial_result.value) > 0 - ), "Expected at least one block with a non-empty partial result before getting the full result" - - while True: - assert all( - a >= b - for a, b in zip(partial_result.value, partial_result.value[1:]) - ), "Partial result always should be sorted for this test" - - new_partial_result = client.readDataWithoutProgress( - need_print_info=False - )[0] - if len(new_partial_result.value) == 0: - break - - data_size = len(partial_result.value) - assert all( - partial_result.value[i] <= new_partial_result.value[i] - for i in range(data_size) - ), f"New partial result values should always be greater then old one because a new block contains more information about the full data. New result {new_partial_result}. Previous result {partial_result}" - - partial_result = new_partial_result - else: - block_rows = len(partial_result.value) - assert ( - block_rows == 0 - ), f"Expected only empty partial result block before getting the full result, but block has {block_rows} rows" - - # Full result - full_result = client.readDataWithoutProgress()[0] - - data_size = len(partial_result.value) - assert all( - partial_result.value[i] <= full_result.value[i] for i in range(data_size) - ), f"Full result values should always be greater then partial result values. Full result {full_result}. Partial result {partial_result}" - - for result in full_result.value: - print(result) - - -def main(): - rows_number = 2e7 + 1 - - # Request with partial result limit less then full limit - run_query_without_errors( - f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 5 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 3", - support_partial_result=True, - ) - - # Request with partial result limit greater then full limit - run_query_without_errors( - f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 5", - support_partial_result=True, - ) - - # Request with OFFSET - run_query_without_errors( - f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3 OFFSET 1 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 5", - support_partial_result=True, - ) - - # Request with OFFSET greater then partial result limit (partial result pipeline use blocks with less then OFFSET, so there will be no elements in block after LimitPartialResultTransform) - run_query_without_errors( - f"SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3 OFFSET 15 SETTINGS max_threads = 1, partial_result_update_duration_ms = 1, max_rows_in_partial_result = 5", - support_partial_result=False, - ) - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference deleted file mode 100644 index dd3a343560f..00000000000 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.reference +++ /dev/null @@ -1,38 +0,0 @@ -Rows 0 Columns 1 -Column number type UInt64 -Rows 3 Columns 1 -Column number type UInt64 -Rows 5 Columns 1 -Column number type UInt64 -20000000 -19999999 -19999998 -19999997 -19999996 -Rows 0 Columns 1 -Column number type UInt64 -Rows 3 Columns 1 -Column number type UInt64 -Rows 3 Columns 1 -Column number type UInt64 -20000000 -19999999 -19999998 -Rows 0 Columns 1 -Column number type UInt64 -Rows 3 Columns 1 -Column number type UInt64 -Rows 3 Columns 1 -Column number type UInt64 -19999999 -19999998 -19999997 -Rows 0 Columns 1 -Column number type UInt64 -Rows 0 Columns 1 -Column number type UInt64 -Rows 3 Columns 1 -Column number type UInt64 -19999985 -19999984 -19999983 diff --git a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh b/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh deleted file mode 100755 index 1ed15197dbf..00000000000 --- a/tests/queries/0_stateless/02833_partial_sorting_result_during_query_execution.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/02833_partial_sorting_result_during_query_execution.python diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python deleted file mode 100644 index a33c714e89c..00000000000 --- a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.python +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - - -import os -import sys - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - -from tcp_client import TCPClient - - -def get_keys(results): - return [key for key, _ in results] - - -def check_new_result(new_results, old_results, invariants, rows_limit): - if rows_limit is not None: - assert ( - len(new_results[0].value) <= rows_limit - ), f"Result should have no more then {rows_limit} rows. But it has {len(new_results[0].value)} rows" - - for new_result, old_result in zip(new_results, old_results): - assert ( - new_result.key == old_result.key - ), f"Keys in blocks should be in the same order. Full results keys {get_keys(full_results)}. Partial results keys {get_keys(partial_results)}" - - key = new_result.key - if key in invariants: - new_value = new_result.value - old_value = old_result.value - assert invariants[key]( - old_value, new_value - ), f"Problem with the invariant between new and old result for key: {key}. New value {new_value}. Old value {old_value}" - - -def run_query_without_errors( - query, support_partial_result, invariants=None, rows_limit=None -): - if invariants is None: - invariants = {} - - with TCPClient() as client: - client.sendQuery(query, settings={"allow_experimental_partial_result": True}) - - # external tables - client.sendEmptyBlock() - client.readHeader() - - # Partial result - partial_results = client.readDataWithoutProgress() - if support_partial_result: - assert ( - len(partial_results) > 0 and len(partial_results[0].value) > 0 - ), "Expected at least one block with a non-empty partial result before getting the full result" - while True: - new_partial_results = client.readDataWithoutProgress( - need_print_info=False - ) - if len(new_partial_results[0].value) == 0: - break - - check_new_result( - new_partial_results, partial_results, invariants, rows_limit - ) - partial_results = new_partial_results - else: - block_rows = len(partial_results[0].value) - assert ( - block_rows == 0 - ), f"Expected only empty partial result block before getting the full result, but block has {block_rows} rows" - - # Full result - full_results = client.readDataWithoutProgress() - if support_partial_result: - check_new_result(full_results, partial_results, invariants, rows_limit) - - for data in full_results: - if isinstance(data.value[0], int): - print(data.key, data.value) - - -def supported_scenarios_without_key(): - rows_number = 2e7 + 1 - - # Simple aggregation query - query = f"select median(number), stddevSamp(number), stddevPop(number), max(number), min(number), any(number), count(number), avg(number), sum(number) from numbers_mt({rows_number}) settings max_threads = 1, partial_result_update_duration_ms = 1" - invariants = { - "median(number)": lambda old_value, new_value: old_value <= new_value, - "max(number)": lambda old_value, new_value: old_value <= new_value, - "min(number)": lambda old_value, new_value: old_value >= new_value, - "count(number)": lambda old_value, new_value: old_value <= new_value, - "avg(number)": lambda old_value, new_value: old_value <= new_value, - "sum(number)": lambda old_value, new_value: old_value <= new_value, - } - run_query_without_errors( - query, support_partial_result=True, invariants=invariants, rows_limit=1 - ) - - # Aggregation query with a nested ORDER BY subquery - query = f"select median(number), stddevSamp(number), stddevPop(number), max(number), min(number), any(number), count(number), avg(number), sum(number) FROM (SELECT number FROM numbers_mt({rows_number}) ORDER BY -number LIMIT 3) settings max_threads = 1, partial_result_update_duration_ms=1" - - # Aggregation receives small partial result blocks from ORDER BY which always sends blocks with bigger values - invariants["min(number)"] = lambda old_value, new_value: old_value <= new_value - run_query_without_errors( - query, support_partial_result=True, invariants=invariants, rows_limit=1 - ) - - -def unsupported_scenarios(): - rows_number = 2e7 + 1 - - # Currently aggregator for partial result supports only single thread aggregation without key - # Update test when multithreading or aggregation with GROUP BY will be supported for partial result updates - multithread_query = f"select sum(number) from numbers_mt({rows_number}) settings max_threads = 2, partial_result_update_duration_ms = 100" - run_query_without_errors(multithread_query, support_partial_result=False) - - group_with_key_query = f"select mod2, sum(number) from numbers_mt({rows_number}) group by number % 2 as mod2 settings max_threads = 1, partial_result_update_duration_ms = 100" - run_query_without_errors(group_with_key_query, support_partial_result=False) - - -def main(): - supported_scenarios_without_key() - unsupported_scenarios() - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference deleted file mode 100644 index aea61fad42f..00000000000 --- a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.reference +++ /dev/null @@ -1,88 +0,0 @@ -Rows 0 Columns 9 -Column median(number) type Float64 -Column stddevSamp(number) type Float64 -Column stddevPop(number) type Float64 -Column max(number) type UInt64 -Column min(number) type UInt64 -Column any(number) type UInt64 -Column count(number) type UInt64 -Column avg(number) type Float64 -Column sum(number) type UInt64 -Rows 1 Columns 9 -Column median(number) type Float64 -Column stddevSamp(number) type Float64 -Column stddevPop(number) type Float64 -Column max(number) type UInt64 -Column min(number) type UInt64 -Column any(number) type UInt64 -Column count(number) type UInt64 -Column avg(number) type Float64 -Column sum(number) type UInt64 -Rows 1 Columns 9 -Column median(number) type Float64 -Column stddevSamp(number) type Float64 -Column stddevPop(number) type Float64 -Column max(number) type UInt64 -Column min(number) type UInt64 -Column any(number) type UInt64 -Column count(number) type UInt64 -Column avg(number) type Float64 -Column sum(number) type UInt64 -max(number) [20000000] -min(number) [0] -any(number) [0] -count(number) [20000001] -sum(number) [200000010000000] -Rows 0 Columns 9 -Column median(number) type Float64 -Column stddevSamp(number) type Float64 -Column stddevPop(number) type Float64 -Column max(number) type UInt64 -Column min(number) type UInt64 -Column any(number) type UInt64 -Column count(number) type UInt64 -Column avg(number) type Float64 -Column sum(number) type UInt64 -Rows 1 Columns 9 -Column median(number) type Float64 -Column stddevSamp(number) type Float64 -Column stddevPop(number) type Float64 -Column max(number) type UInt64 -Column min(number) type UInt64 -Column any(number) type UInt64 -Column count(number) type UInt64 -Column avg(number) type Float64 -Column sum(number) type UInt64 -Rows 1 Columns 9 -Column median(number) type Float64 -Column stddevSamp(number) type Float64 -Column stddevPop(number) type Float64 -Column max(number) type UInt64 -Column min(number) type UInt64 -Column any(number) type UInt64 -Column count(number) type UInt64 -Column avg(number) type Float64 -Column sum(number) type UInt64 -max(number) [20000000] -min(number) [19999998] -any(number) [20000000] -count(number) [3] -sum(number) [59999997] -Rows 0 Columns 1 -Column sum(number) type UInt64 -Rows 0 Columns 1 -Column sum(number) type UInt64 -Rows 1 Columns 1 -Column sum(number) type UInt64 -sum(number) [200000010000000] -Rows 0 Columns 2 -Column mod2 type UInt8 -Column sum(number) type UInt64 -Rows 0 Columns 2 -Column mod2 type UInt8 -Column sum(number) type UInt64 -Rows 2 Columns 2 -Column mod2 type UInt8 -Column sum(number) type UInt64 -mod2 [0, 1] -sum(number) [100000010000000, 100000000000000] diff --git a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh b/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh deleted file mode 100755 index e70a3c53ec4..00000000000 --- a/tests/queries/0_stateless/02834_partial_aggregating_result_during_query_execution.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/02834_partial_aggregating_result_during_query_execution.python diff --git a/tests/queries/0_stateless/02841_check_table_progress.reference b/tests/queries/0_stateless/02841_check_table_progress.reference new file mode 100644 index 00000000000..541dab48def --- /dev/null +++ b/tests/queries/0_stateless/02841_check_table_progress.reference @@ -0,0 +1,2 @@ +Ok +Ok diff --git a/tests/queries/0_stateless/02841_check_table_progress.sh b/tests/queries/0_stateless/02841_check_table_progress.sh new file mode 100755 index 00000000000..166386b999b --- /dev/null +++ b/tests/queries/0_stateless/02841_check_table_progress.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t0"; +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t0 (x UInt64, val String) ENGINE = MergeTree ORDER BY x PARTITION BY x % 100"; +${CLICKHOUSE_CLIENT} -q "INSERT INTO t0 SELECT sipHash64(number), randomPrintableASCII(1000) FROM numbers(1000)"; + + +# Check that we have at least 3 different values for read_rows +UNIQUE_VALUES=$( + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "CHECK TABLE t0" -v |& { + grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: | grep -o '"read_rows"\s*:\s*"[0-9]*"' + } | uniq | wc -l +) + +[ "$UNIQUE_VALUES" -ge "3" ] && echo "Ok" || echo "Fail: got $UNIQUE_VALUES" + + +# Check that we have we have at least 100 total_rows_to_read (at least one check task per partition) +MAX_TOTAL_VALUE=$( + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d @- <<< "CHECK TABLE t0" -v |& { + grep -F -e X-ClickHouse-Progress: -e X-ClickHouse-Summary: | grep -o '"total_rows_to_read"\s*:\s*"[0-9]*"' | grep -o '[0-9]*' + } | sort -n | tail -1 +) + +[ "$MAX_TOTAL_VALUE" -ge "100" ] && echo "Ok" || echo "Fail: got $MAX_TOTAL_VALUE" diff --git a/tests/queries/0_stateless/02845_threads_count_in_distributed_queries.reference b/tests/queries/0_stateless/02845_threads_count_in_distributed_queries.reference new file mode 100644 index 00000000000..c8338ebaf7c --- /dev/null +++ b/tests/queries/0_stateless/02845_threads_count_in_distributed_queries.reference @@ -0,0 +1,15 @@ +prefer_localhost_replica=1, remote query with a lot of union all +77 +ok +prefer_localhost_replica=0, remote query with a lot of union all +77 +ok +prefer_localhost_replica=1, async_socket_for_remote=0, remote query with a lot of union all (lot of threads) +77 +ok +prepare test schema +95 +prefer_localhost_replica=1, remote query with read in order +ok +prefer_localhost_replica=1 + async_socket_for_remote=0, remote query with read in order (lot of threads) +ok diff --git a/tests/queries/0_stateless/02845_threads_count_in_distributed_queries.sql.j2 b/tests/queries/0_stateless/02845_threads_count_in_distributed_queries.sql.j2 new file mode 100644 index 00000000000..ffdd4e3400e --- /dev/null +++ b/tests/queries/0_stateless/02845_threads_count_in_distributed_queries.sql.j2 @@ -0,0 +1,229 @@ +-- enforce some defaults to be sure that the env settings will not affect the test +SET max_threads=5, async_socket_for_remote=1, prefer_localhost_replica=1, optimize_read_in_order=1, load_marks_asynchronously=0, local_filesystem_read_method='pread', remote_filesystem_read_method='read'; + +-- we use query_thread_log to check peak thread usage +-- after https://github.com/ClickHouse/ClickHouse/issues/53417 there is a simpler way to check it +-- but that will not allow to backport the test to older versions +SET log_query_threads=1; + + +-------------------- +SELECT 'prefer_localhost_replica=1, remote query with a lot of union all' AS testname; + +-- query with lot of dummy union all will create a lot of streams +-- let's check how many threads clickhouse will start for that + +select count() from remote('127.0.0.1:9000', view( +{% for n in range(77) -%} +SELECT * FROM system.one {{ "UNION ALL" if not loop.last }} +{% endfor -%} + )) SETTINGS log_comment='check_concurrency_in_remote_queries1'; + +SYSTEM FLUSH LOGS; + +WITH + maxIntersections( + toUnixTimestamp64Micro(query_start_time_microseconds), + toUnixTimestamp64Micro(event_time_microseconds) + ) as peak_threads +SELECT + if(peak_threads BETWEEN 1 AND toUInt64(getSetting('max_threads')) + 2, 'ok', 'too many threads: ' || toString(peak_threads) ) AS result +FROM system.query_thread_log +WHERE + event_time > now() - 60 + AND query_id = ( + SELECT query_id + FROM system.query_log + WHERE + type = 'QueryFinish' + AND event_time > now() - 60 + AND log_comment = 'check_concurrency_in_remote_queries1' + AND current_database = currentDatabase() + ORDER BY event_time DESC LIMIT 1 + ); + +-------------------- +SELECT 'prefer_localhost_replica=0, remote query with a lot of union all' AS testname; + +select count() from remote('127.0.0.1:9000', view( +{% for n in range(77) -%} +SELECT * FROM system.one {{ "UNION ALL" if not loop.last }} +{% endfor -%} + )) SETTINGS log_comment='check_concurrency_in_remote_queries2', prefer_localhost_replica=0; + +SYSTEM FLUSH LOGS; + +WITH + maxIntersections( + toUnixTimestamp64Micro(query_start_time_microseconds), + toUnixTimestamp64Micro(event_time_microseconds) + ) as peak_threads +SELECT + if(peak_threads BETWEEN 1 AND toUInt64(getSetting('max_threads')) + 2, 'ok', 'too many threads: ' || toString(peak_threads) ) AS result +FROM system.query_thread_log +WHERE + event_time > now() - 60 + AND query_id = ( + SELECT query_id + FROM system.query_log + WHERE + type = 'QueryFinish' + AND event_time > now() - 60 + AND log_comment = 'check_concurrency_in_remote_queries2' + AND current_database = currentDatabase() + ORDER BY event_time DESC LIMIT 1 + ); + +-------------------- +SELECT 'prefer_localhost_replica=1, async_socket_for_remote=0, remote query with a lot of union all (lot of threads)' AS testname; + +-- that is actually a bad behaviour, but it used to work like that for a long time. +-- now is happens only for async_socket_for_remote=0 (while it is 1 by default) +-- see https://github.com/ClickHouse/ClickHouse/issues/53287 + +select count() from remote('127.0.0.1:9000', view( +{% for n in range(77) -%} +SELECT * FROM system.one {{ "UNION ALL" if not loop.last }} +{% endfor -%} + )) SETTINGS log_comment='check_concurrency_in_remote_queries3', async_socket_for_remote=0, prefer_localhost_replica=1; + +SYSTEM FLUSH LOGS; + +WITH + maxIntersections( + toUnixTimestamp64Micro(query_start_time_microseconds), + toUnixTimestamp64Micro(event_time_microseconds) + ) as peak_threads +SELECT + if(peak_threads >= 77, 'ok', 'too few threads: ' || toString(peak_threads) ) AS result +FROM system.query_thread_log +WHERE + event_time > now() - 60 + AND query_id = ( + SELECT query_id + FROM system.query_log + WHERE + type = 'QueryFinish' + AND event_time > now() - 60 + AND log_comment = 'check_concurrency_in_remote_queries3' + AND current_database = currentDatabase() + ORDER BY event_time DESC LIMIT 1 + ); + +-- less synthetic testcase from the issue https://github.com/ClickHouse/ClickHouse/issues/53287 +-- it creates lot of streams because of many parts (one per part) + optimize_read_in_order=1 feature + +SELECT 'prepare test schema' AS stage; + +DROP TABLE IF EXISTS test_lot_of_parts_distributed; +DROP TABLE IF EXISTS test_lot_of_parts; + +CREATE TABLE test_lot_of_parts +( + `a` String, + `b` LowCardinality(String), + `c` DateTime64(3), + `val` String, +) +ENGINE = MergeTree +ORDER BY (a, b, c) +SETTINGS parts_to_delay_insert=0; + +CREATE TABLE test_lot_of_parts_distributed +( + `a` String, + `b` LowCardinality(String), + `c` DateTime64(3), + `val` String, +) +ENGINE = Distributed(test_shard_localhost, currentDatabase(), 'test_lot_of_parts', rand()); + +-- we need a lot of parts to make sure that we will have a lot of streams +SYSTEM STOP MERGES test_lot_of_parts; +INSERT INTO test_lot_of_parts (a, b, c, val) + SELECT + 'foo' as a, + 'bar' as b, + _CAST('1683504000', 'DateTime64') as c, + 'baz' as val + FROM numbers_mt(95) + SETTINGS max_block_size = 1, min_insert_block_size_bytes=1, min_insert_block_size_rows=1; --every row will be in separate part + +select count() from system.parts where table = 'test_lot_of_parts' and active and database = currentDatabase(); + +SELECT 'prefer_localhost_replica=1, remote query with read in order' AS testname; + +-- query which uses optimize_read_in_order=1 +SELECT DISTINCT + 'val' AS fieldType, + val AS value +FROM test_lot_of_parts_distributed +WHERE a = 'foo' AND value != '' AND positionCaseInsensitiveUTF8(value, 'baz') > 0 AND b = 'bar' AND c >= _CAST('1683504000', 'DateTime64') +ORDER BY c DESC +LIMIT 5 +SETTINGS log_comment='check_concurrency_in_remote_queries4' FORMAT Null; + +SYSTEM FLUSH LOGS; + +WITH + maxIntersections( + toUnixTimestamp64Micro(query_start_time_microseconds), + toUnixTimestamp64Micro(event_time_microseconds) + ) as peak_threads +SELECT + if(peak_threads BETWEEN 1 AND toUInt64(getSetting('max_threads')) + 2, 'ok', 'too many threads: ' || toString(peak_threads) ) AS result +FROM system.query_thread_log +WHERE + event_time > now() - 60 + AND query_id = ( + SELECT query_id + FROM system.query_log + WHERE + type = 'QueryFinish' + AND event_time > now() - 60 + AND log_comment = 'check_concurrency_in_remote_queries4' + AND current_database = currentDatabase() + ORDER BY event_time DESC LIMIT 1 + ); + + +SELECT 'prefer_localhost_replica=1 + async_socket_for_remote=0, remote query with read in order (lot of threads)' AS testname; + +-- that is actually a bad behaviour, but it used to work like that for a long time. +-- now is happens only for async_socket_for_remote=0 (while it is 1 by default) + +SELECT DISTINCT + 'val' AS fieldType, + val AS value +FROM test_lot_of_parts_distributed +WHERE a = 'foo' AND value != '' AND positionCaseInsensitiveUTF8(value, 'baz') > 0 AND b = 'bar' AND c >= _CAST('1683504000', 'DateTime64') +ORDER BY c DESC +LIMIT 5 +SETTINGS log_comment='check_concurrency_in_remote_queries5', async_socket_for_remote=0 FORMAT Null; + +SYSTEM FLUSH LOGS; + +WITH + maxIntersections( + toUnixTimestamp64Micro(query_start_time_microseconds), + toUnixTimestamp64Micro(event_time_microseconds) + ) as peak_threads +SELECT + if(peak_threads >= 95, 'ok', 'too few threads: ' || toString(peak_threads) ) AS result + -- we have 95 parts +FROM system.query_thread_log +WHERE + event_time > now() - 60 + AND query_id = ( + SELECT query_id + FROM system.query_log + WHERE + type = 'QueryFinish' + AND event_time > now() - 60 + AND log_comment = 'check_concurrency_in_remote_queries5' + AND current_database = currentDatabase() + ORDER BY event_time DESC LIMIT 1 + ); + +DROP TABLE IF EXISTS test_lot_of_parts_distributed; +DROP TABLE IF EXISTS test_lot_of_parts; diff --git a/tests/queries/0_stateless/02874_json_merge_patch_function_test.reference b/tests/queries/0_stateless/02874_json_merge_patch_function_test.reference new file mode 100644 index 00000000000..d7de5819d77 --- /dev/null +++ b/tests/queries/0_stateless/02874_json_merge_patch_function_test.reference @@ -0,0 +1,20 @@ +\N +{"a":1} +{"a":1,"b":1} +{"a":1,"b":1,"c":[1,2]} +{"a":1,"b":1,"c":[{"d":1},2]} +{"a":1,"name":"zoey"} +{"a":"1","b":2,"c":"1"} +{"a":{"b":[3,4],"c":2}} +{"a":5,"b":2,"c":4,"d":6} +{"a":1,"b":null} +{"k0":0,"k1":1} +{"k2":2,"k3":3} +{"k4":4,"k5":5} +{"k6":6,"k7":7} +{"k8":8,"k9":9} +{"k10":222,"k11":11} +{"k12":222,"k13":13} +{"k14":222,"k15":15} +{"k16":222,"k17":17} +{"k18":222,"k19":19} diff --git a/tests/queries/0_stateless/02874_json_merge_patch_function_test.sql b/tests/queries/0_stateless/02874_json_merge_patch_function_test.sql new file mode 100644 index 00000000000..9f0c3fd48cd --- /dev/null +++ b/tests/queries/0_stateless/02874_json_merge_patch_function_test.sql @@ -0,0 +1,24 @@ +-- Tags: no-fasttest +select jsonMergePatch(null); +select jsonMergePatch('{"a":1}'); +select jsonMergePatch('{"a":1}', '{"b":1}'); +select jsonMergePatch('{"a":1}', '{"b":1}', '{"c":[1,2]}'); +select jsonMergePatch('{"a":1}', '{"b":1}', '{"c":[{"d":1},2]}'); +select jsonMergePatch('{"a":1}','{"name": "joey"}','{"name": "tom"}','{"name": "zoey"}'); +select jsonMergePatch('{"a": "1","b": 2,"c": [true,{"qrdzkzjvnos": true,"yxqhipj": false,"oesax": "33o8_6AyUy"}]}', '{"c": "1"}'); +select jsonMergePatch('{"a": {"b": 1, "c": 2}}', '{"a": {"b": [3, 4]}}'); +select jsonMergePatch('{ "a": 1, "b":2 }','{ "a": 3, "c":4 }','{ "a": 5, "d":6 }'); +select jsonMergePatch('{"a":1, "b":2}', '{"b":null}'); + +select jsonMergePatch('[1]'); -- { serverError BAD_ARGUMENTS } +select jsonMergePatch('{"a": "1","b": 2,"c": [true,"qrdzkzjvnos": true,"yxqhipj": false,"oesax": "33o8_6AyUy"}]}', '{"c": "1"}'); -- { serverError BAD_ARGUMENTS } + +drop table if exists t_json_merge; +create table t_json_merge (id UInt64, s1 String, s2 String) engine = Memory; + +insert into t_json_merge select number, format('{{ "k{0}": {0} }}', toString(number * 2)), format('{{ "k{0}": {0} }}', toString(number * 2 + 1)) from numbers(5); +insert into t_json_merge select number, format('{{ "k{0}": {0} }}', toString(number * 2)), format('{{ "k{0}": {0}, "k{1}": 222 }}', toString(number * 2 + 1), toString(number * 2)) from numbers(5, 5); + +select jsonMergePatch(s1, s2) from t_json_merge ORDER BY id; + +drop table t_json_merge; diff --git a/tests/queries/0_stateless/02876_experimental_partial_result.sql b/tests/queries/0_stateless/02876_experimental_partial_result.sql deleted file mode 100644 index 8418f07c750..00000000000 --- a/tests/queries/0_stateless/02876_experimental_partial_result.sql +++ /dev/null @@ -1,4 +0,0 @@ - -SET partial_result_update_duration_ms = 10; - -SELECT sum(number) FROM numbers_mt(100_000) SETTINGS max_threads = 1; -- { serverError FUNCTION_NOT_ALLOWED } diff --git a/tests/queries/0_stateless/02889_datetime64_from_string.reference b/tests/queries/0_stateless/02889_datetime64_from_string.reference new file mode 100644 index 00000000000..e6e2208ed4c --- /dev/null +++ b/tests/queries/0_stateless/02889_datetime64_from_string.reference @@ -0,0 +1,3 @@ +1969-12-31 23:57:57.000 +1970-01-01 00:00:23.900 +1969-12-31 23:59:36.100 diff --git a/tests/queries/0_stateless/02889_datetime64_from_string.sql b/tests/queries/0_stateless/02889_datetime64_from_string.sql new file mode 100644 index 00000000000..50c29de19bd --- /dev/null +++ b/tests/queries/0_stateless/02889_datetime64_from_string.sql @@ -0,0 +1,5 @@ +SELECT toDateTime64('-123', 3, 'UTC'); -- Allowed: no year starts with '-' +SELECT toDateTime64('23.9', 3, 'UTC'); -- Allowed: no year has a dot in notation +SELECT toDateTime64('-23.9', 3, 'UTC'); -- Allowed + +SELECT toDateTime64('1234', 3, 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.reference b/tests/queries/0_stateless/02892_orc_filter_pushdown.reference new file mode 100644 index 00000000000..9059b403a34 --- /dev/null +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.reference @@ -0,0 +1,227 @@ +number Nullable(Int64) +u8 Nullable(Int8) +u16 Nullable(Int16) +u32 Nullable(Int32) +u64 Nullable(Int64) +i8 Nullable(Int8) +i16 Nullable(Int16) +i32 Nullable(Int32) +i64 Nullable(Int64) +date32 Nullable(Date32) +dt64_ms Nullable(DateTime64(9)) +dt64_us Nullable(DateTime64(9)) +dt64_ns Nullable(DateTime64(9)) +dt64_s Nullable(DateTime64(9)) +dt64_cs Nullable(DateTime64(9)) +f32 Nullable(Float32) +f64 Nullable(Float64) +s Nullable(String) +fs Nullable(String) +d32 Nullable(Decimal(9, 3)) +d64 Nullable(Decimal(18, 10)) +d128 Nullable(Decimal(38, 20)) +-- Go over all types individually +-- { echoOn } +select count(), sum(number) from file('02892.orc') where indexHint(u8 in (10, 15, 250)); +800 4229600 +select count(1), min(u8), max(u8) from file('02892.orc') where u8 in (10, 15, 250); +66 10 15 +select count(), sum(number) from file('02892.orc') where indexHint(i8 between -3 and 2); +1000 4999500 +select count(1), min(i8), max(i8) from file('02892.orc') where i8 between -3 and 2; +208 -3 2 +select count(), sum(number) from file('02892.orc') where indexHint(u16 between 4000 and 61000 or u16 == 42); +1800 6479100 +select count(1), min(u16), max(u16) from file('02892.orc') where u16 between 4000 and 61000 or u16 == 42; +1002 42 5000 +select count(), sum(number) from file('02892.orc') where indexHint(i16 between -150 and 250); +500 2474750 +select count(1), min(i16), max(i16) from file('02892.orc') where i16 between -150 and 250; +401 -150 250 +select count(), sum(number) from file('02892.orc') where indexHint(u32 in (42, 4294966296)); +200 999900 +select count(1), min(u32), max(u32) from file('02892.orc') where u32 in (42, 4294966296); +1 42 42 +select count(), sum(number) from file('02892.orc') where indexHint(i32 between -150 and 250); +500 2474750 +select count(1), min(i32), max(i32) from file('02892.orc') where i32 between -150 and 250; +401 -150 250 +select count(), sum(number) from file('02892.orc') where indexHint(u64 in (42, 18446744073709550616)); +100 494950 +select count(1), min(u64), max(u64) from file('02892.orc') where u64 in (42, 18446744073709550616); +1 42 42 +select count(), sum(number) from file('02892.orc') where indexHint(i64 between -150 and 250); +500 2474750 +select count(1), min(i64), max(i64) from file('02892.orc') where i64 between -150 and 250; +401 -150 250 +select count(), sum(number) from file('02892.orc') where indexHint(date32 between '1992-01-01' and '2023-08-02'); +2100 5563950 +select count(1), min(date32), max(date32) from file('02892.orc') where date32 between '1992-01-01' and '2023-08-02'; +1994 1992-01-04 2023-08-02 +select count(), sum(number) from file('02892.orc') where indexHint(dt64_ms between '2000-01-01' and '2005-01-01'); +300 1184850 +select count(1), min(dt64_ms), max(dt64_ms) from file('02892.orc') where dt64_ms between '2000-01-01' and '2005-01-01'; +158 2000-01-04 15:33:20.000000000 2004-12-25 18:40:00.000000000 +select count(), sum(number) from file('02892.orc') where indexHint(dt64_us between toDateTime64(900000000, 2) and '2005-01-01'); +400 1599800 +select count(1), min(dt64_us), max(dt64_us) from file('02892.orc') where (dt64_us between toDateTime64(900000000, 2) and '2005-01-01'); +205 1998-07-09 16:00:00.000000000 2004-12-25 18:40:00.000000000 +select count(), sum(number) from file('02892.orc') where indexHint(dt64_ns between '2000-01-01' and '2005-01-01'); +300 1184850 +select count(1), min(dt64_ns), max(dt64_ns) from file('02892.orc') where (dt64_ns between '2000-01-01' and '2005-01-01'); +158 2000-01-04 15:33:20.000000000 2004-12-25 18:40:00.000000000 +select count(), sum(number) from file('02892.orc') where indexHint(dt64_s between toDateTime64('-2.01e8'::Decimal64(0), 0) and toDateTime64(1.5e8::Decimal64(0), 0)); +500 2524750 +select count(1), min(dt64_s), max(dt64_s) from file('02892.orc') where (dt64_s between toDateTime64('-2.01e8'::Decimal64(0), 0) and toDateTime64(1.5e8::Decimal64(0), 0)); +352 1963-08-19 14:40:00.000000000 1974-10-03 02:40:00.000000000 +select count(), sum(number) from file('02892.orc') where indexHint(dt64_cs between toDateTime64('-2.01e8'::Decimal64(1), 1) and toDateTime64(1.5e8::Decimal64(2), 2)); +500 2524750 +select count(1), min(dt64_cs), max(dt64_cs) from file('02892.orc') where (dt64_cs between toDateTime64('-2.01e8'::Decimal64(1), 1) and toDateTime64(1.5e8::Decimal64(2), 2)); +352 1963-08-19 14:40:00.000000000 1974-10-03 02:40:00.000000000 +select count(), sum(number) from file('02892.orc') where indexHint(f32 between -0.11::Float32 and 0.06::Float32); +300 1514850 +select count(1), min(f32), max(f32) from file('02892.orc') where (f32 between -0.11::Float32 and 0.06::Float32); +171 -0.11 0.06 +select count(), sum(number) from file('02892.orc') where indexHint(f64 between -0.11 and 0.06); +300 1514850 +select count(1), min(f64), max(f64) from file('02892.orc') where (f64 between -0.11 and 0.06); +171 -0.11 0.06 +select count(), sum(number) from file('02892.orc') where indexHint(s between '-9' and '1!!!'); +300 1594850 +select count(1), min(s), max(s) from file('02892.orc') where (s between '-9' and '1!!!'); +113 -9 1 +select count(), sum(number) from file('02892.orc') where indexHint(fs between '-9' and '1!!!'); +300 1594850 +select count(1), min(fs), max(fs) from file('02892.orc') where (fs between '-9' and '1!!!'); +113 -9\0\0\0\0\0\0\0 1\0\0\0\0\0\0\0\0 +select count(), sum(number) from file('02892.orc') where indexHint(d32 between '-0.011'::Decimal32(3) and 0.006::Decimal32(3)); +200 999900 +select count(1), min(d32), max(d32) from file('02892.orc') where (d32 between '-0.011'::Decimal32(3) and 0.006::Decimal32(3)); +23 -0.011 0.006 +select count(), sum(number) from file('02892.orc') where indexHint(d64 between '-0.0000011'::Decimal64(7) and 0.0000006::Decimal64(9)); +200 999900 +select count(1), min(d64), max(d64) from file('02892.orc') where (d64 between '-0.0000011'::Decimal64(7) and 0.0000006::Decimal64(9)); +21 -0.000001053 0.000000567 +select count(), sum(number) from file('02892.orc') where indexHint(d128 between '-0.00000000000011'::Decimal128(20) and 0.00000000000006::Decimal128(20)); +200 999900 +select count(1), min(d128), max(128) from file('02892.orc') where (d128 between '-0.00000000000011'::Decimal128(20) and 0.00000000000006::Decimal128(20)); +21 -0.0000000000001053 128 +-- Some random other cases. +select count(), sum(number) from file('02892.orc') where indexHint(0); +0 \N +select count(), min(number), max(number) from file('02892.orc') where indexHint(0); +0 \N \N +select count(), sum(number) from file('02892.orc') where indexHint(s like '99%' or u64 == 2000); +300 1204850 +select count(), min(s), max(s) from file('02892.orc') where (s like '99%' or u64 == 2000); +12 2000 999 +select count(), sum(number) from file('02892.orc') where indexHint(s like 'z%'); +0 \N +select count(), min(s), max(s) from file('02892.orc') where (s like 'z%'); +0 \N \N +select count(), sum(number) from file('02892.orc') where indexHint(u8 == 10 or 1 == 1); +10000 49995000 +select count(), min(u8), max(u8) from file('02892.orc') where (u8 == 10 or 1 == 1); +10000 -128 127 +select count(), sum(number) from file('02892.orc') where indexHint(u8 < 0); +5300 26042350 +select count(), min(u8), max(u8) from file('02892.orc') where (u8 < 0); +5001 -128 -1 +-- { echoOn } +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null is NULL); +500 244750 +select count(), min(sometimes_null), max(sometimes_null) from file('02892.orc') where (sometimes_null is NULL); +5 \N \N +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null_lc is NULL); +500 244750 +select count(), min(sometimes_null_lc), max(sometimes_null_lc) from file('02892.orc') where (sometimes_null_lc is NULL); +5 \N \N +select count(), sum(number) from file('02892.orc') where indexHint(mostly_null is not NULL); +300 104850 +select count(), min(mostly_null), max(mostly_null) from file('02892.orc') where (mostly_null is not NULL); +3 0 690 +select count(), sum(number) from file('02892.orc') where indexHint(mostly_null_lc is not NULL); +300 104850 +select count(), min(mostly_null_lc), max(mostly_null_lc) from file('02892.orc') where (mostly_null_lc is not NULL); +3 0 690 +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null > 850); +200 179900 +select count(), min(sometimes_null), max(sometimes_null) from file('02892.orc') where (sometimes_null > 850); +148 851 999 +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null_lc > 850); +200 179900 +select count(), min(sometimes_null_lc), max(sometimes_null_lc) from file('02892.orc') where (sometimes_null_lc > 850); +148 851 999 +select count(), sum(number) from file('02892.orc') where indexHint(never_null > 850); +200 179900 +select count(), min(never_null), max(never_null) from file('02892.orc') where (never_null > 850); +149 851 999 +select count(), sum(number) from file('02892.orc') where indexHint(never_null_lc > 850); +200 179900 +select count(), min(never_null_lc), max(never_null_lc) from file('02892.orc') where (never_null_lc > 850); +149 851 999 +select count(), sum(number) from file('02892.orc') where indexHint(never_null < 150); +200 19900 +select count(), min(never_null), max(never_null) from file('02892.orc') where (never_null < 150); +150 0 149 +select count(), sum(number) from file('02892.orc') where indexHint(never_null_lc < 150); +200 19900 +select count(), min(never_null_lc), max(never_null_lc) from file('02892.orc') where (never_null_lc < 150); +150 0 149 +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null < 150); +200 19900 +select count(), min(sometimes_null), max(sometimes_null) from file('02892.orc') where (sometimes_null < 150); +149 1 149 +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null_lc < 150); +200 19900 +select count(), min(sometimes_null_lc), max(sometimes_null_lc) from file('02892.orc') where (sometimes_null_lc < 150); +149 1 149 +-- { echoOn } +select count(), sum(number) from file('02892.orc') where indexHint(positive_or_null < 50); -- quirk with infinities +0 \N +select count(), min(positive_or_null), max(positive_or_null) from file('02892.orc') where (positive_or_null < 50); +0 \N \N +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, positive_or_null UInt64') where indexHint(positive_or_null < 50); +1000 499500 +select count(), min(positive_or_null), max(positive_or_null) from file('02892.orc', ORC, 'number UInt64, positive_or_null UInt64') where (positive_or_null < 50); +5 0 0 +select count(), sum(number) from file('02892.orc') where indexHint(negative_or_null > -50); +0 \N +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc') where (negative_or_null > -50); +0 \N \N +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null > -50); +1000 499500 +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null > -50); +5 0 0 +select count(), sum(number) from file('02892.orc') where indexHint(string_or_null == ''); -- quirk with infinities +0 \N +select count(), min(string_or_null), max(string_or_null) from file('02892.orc') where (string_or_null == ''); +0 \N \N +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, string_or_null String') where indexHint(string_or_null == ''); +1000 499500 +select count(), min(string_or_null), max(string_or_null) from file('02892.orc', ORC, 'number UInt64, string_or_null String') where (string_or_null == ''); +5 +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where indexHint(nEgAtIvE_oR_nUlL > -50) settings input_format_orc_case_insensitive_column_matching = 1; +1000 499500 +select count(), min(nEgAtIvE_oR_nUlL), max(nEgAtIvE_oR_nUlL) from file('02892.orc', ORC, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where (nEgAtIvE_oR_nUlL > -50) settings input_format_orc_case_insensitive_column_matching = 1; +5 0 0 +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null < -500); +600 419700 +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null < -500); +596 -1099 -501 +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null); +1000 499500 +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null is null); +0 0 0 +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null in (0, -1, -10, -100, -1000)); +1000 499500 +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null in (0, -1, -10, -100, -1000)); +6 -1000 0 +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(String)') where indexHint(string_or_null like 'I am%'); +1000 499500 +select count(), min(string_or_null), max(string_or_null) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(String)') where (string_or_null like 'I am%'); +995 I am a string I am a string +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(Nullable(String))') where indexHint(string_or_null like 'I am%'); +1000 499500 +select count(), min(string_or_null), max(string_or_null) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(Nullable(String))') where (string_or_null like 'I am%'); +995 I am a string I am a string diff --git a/tests/queries/0_stateless/02892_orc_filter_pushdown.sql b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql new file mode 100644 index 00000000000..d319252f592 --- /dev/null +++ b/tests/queries/0_stateless/02892_orc_filter_pushdown.sql @@ -0,0 +1,229 @@ +-- Tags: no-fasttest, no-parallel, no-cpu-aarch64 + +set output_format_orc_string_as_string = 1; +set output_format_orc_row_index_stride = 100; +set input_format_orc_row_batch_size = 100; +set input_format_orc_filter_push_down = 1; +set input_format_null_as_default = 1; + +set engine_file_truncate_on_insert = 1; +set optimize_or_like_chain = 0; +set max_block_size = 100000; +set max_insert_threads = 1; + +SET session_timezone = 'UTC'; + +-- Analyzer breaks the queries with IN and some queries with BETWEEN. +set allow_experimental_analyzer=0; + + +-- Try all the types. +insert into function file('02892.orc') + -- Use negative numbers to test sign extension for signed types and lack of sign extension for + -- unsigned types. + with 5000 - number as n +select + number, + intDiv(n, 11)::UInt8 as u8, + n::UInt16 u16, + n::UInt32 as u32, + n::UInt64 as u64, + intDiv(n, 11)::Int8 as i8, + n::Int16 i16, + n::Int32 as i32, + n::Int64 as i64, + + toDate32(n*500000) as date32, + toDateTime64(n*1e6, 3) as dt64_ms, + toDateTime64(n*1e6, 6) as dt64_us, + toDateTime64(n*1e6, 9) as dt64_ns, + toDateTime64(n*1e6, 0) as dt64_s, + toDateTime64(n*1e6, 2) as dt64_cs, + (n/1000)::Float32 as f32, + (n/1000)::Float64 as f64, + n::String as s, + n::String::FixedString(9) as fs, + n::Decimal32(3)/1234 as d32, + n::Decimal64(10)/12345678 as d64, + n::Decimal128(20)/123456789012345 as d128 + from numbers(10000); + +desc file('02892.orc'); + + +-- Go over all types individually +-- { echoOn } +select count(), sum(number) from file('02892.orc') where indexHint(u8 in (10, 15, 250)); +select count(1), min(u8), max(u8) from file('02892.orc') where u8 in (10, 15, 250); + +select count(), sum(number) from file('02892.orc') where indexHint(i8 between -3 and 2); +select count(1), min(i8), max(i8) from file('02892.orc') where i8 between -3 and 2; + +select count(), sum(number) from file('02892.orc') where indexHint(u16 between 4000 and 61000 or u16 == 42); +select count(1), min(u16), max(u16) from file('02892.orc') where u16 between 4000 and 61000 or u16 == 42; + +select count(), sum(number) from file('02892.orc') where indexHint(i16 between -150 and 250); +select count(1), min(i16), max(i16) from file('02892.orc') where i16 between -150 and 250; + +select count(), sum(number) from file('02892.orc') where indexHint(u32 in (42, 4294966296)); +select count(1), min(u32), max(u32) from file('02892.orc') where u32 in (42, 4294966296); + +select count(), sum(number) from file('02892.orc') where indexHint(i32 between -150 and 250); +select count(1), min(i32), max(i32) from file('02892.orc') where i32 between -150 and 250; + +select count(), sum(number) from file('02892.orc') where indexHint(u64 in (42, 18446744073709550616)); +select count(1), min(u64), max(u64) from file('02892.orc') where u64 in (42, 18446744073709550616); + +select count(), sum(number) from file('02892.orc') where indexHint(i64 between -150 and 250); +select count(1), min(i64), max(i64) from file('02892.orc') where i64 between -150 and 250; + +select count(), sum(number) from file('02892.orc') where indexHint(date32 between '1992-01-01' and '2023-08-02'); +select count(1), min(date32), max(date32) from file('02892.orc') where date32 between '1992-01-01' and '2023-08-02'; + +select count(), sum(number) from file('02892.orc') where indexHint(dt64_ms between '2000-01-01' and '2005-01-01'); +select count(1), min(dt64_ms), max(dt64_ms) from file('02892.orc') where dt64_ms between '2000-01-01' and '2005-01-01'; + +select count(), sum(number) from file('02892.orc') where indexHint(dt64_us between toDateTime64(900000000, 2) and '2005-01-01'); +select count(1), min(dt64_us), max(dt64_us) from file('02892.orc') where (dt64_us between toDateTime64(900000000, 2) and '2005-01-01'); + +select count(), sum(number) from file('02892.orc') where indexHint(dt64_ns between '2000-01-01' and '2005-01-01'); +select count(1), min(dt64_ns), max(dt64_ns) from file('02892.orc') where (dt64_ns between '2000-01-01' and '2005-01-01'); + +select count(), sum(number) from file('02892.orc') where indexHint(dt64_s between toDateTime64('-2.01e8'::Decimal64(0), 0) and toDateTime64(1.5e8::Decimal64(0), 0)); +select count(1), min(dt64_s), max(dt64_s) from file('02892.orc') where (dt64_s between toDateTime64('-2.01e8'::Decimal64(0), 0) and toDateTime64(1.5e8::Decimal64(0), 0)); + +select count(), sum(number) from file('02892.orc') where indexHint(dt64_cs between toDateTime64('-2.01e8'::Decimal64(1), 1) and toDateTime64(1.5e8::Decimal64(2), 2)); +select count(1), min(dt64_cs), max(dt64_cs) from file('02892.orc') where (dt64_cs between toDateTime64('-2.01e8'::Decimal64(1), 1) and toDateTime64(1.5e8::Decimal64(2), 2)); + +select count(), sum(number) from file('02892.orc') where indexHint(f32 between -0.11::Float32 and 0.06::Float32); +select count(1), min(f32), max(f32) from file('02892.orc') where (f32 between -0.11::Float32 and 0.06::Float32); + +select count(), sum(number) from file('02892.orc') where indexHint(f64 between -0.11 and 0.06); +select count(1), min(f64), max(f64) from file('02892.orc') where (f64 between -0.11 and 0.06); + +select count(), sum(number) from file('02892.orc') where indexHint(s between '-9' and '1!!!'); +select count(1), min(s), max(s) from file('02892.orc') where (s between '-9' and '1!!!'); + +select count(), sum(number) from file('02892.orc') where indexHint(fs between '-9' and '1!!!'); +select count(1), min(fs), max(fs) from file('02892.orc') where (fs between '-9' and '1!!!'); + +select count(), sum(number) from file('02892.orc') where indexHint(d32 between '-0.011'::Decimal32(3) and 0.006::Decimal32(3)); +select count(1), min(d32), max(d32) from file('02892.orc') where (d32 between '-0.011'::Decimal32(3) and 0.006::Decimal32(3)); + +select count(), sum(number) from file('02892.orc') where indexHint(d64 between '-0.0000011'::Decimal64(7) and 0.0000006::Decimal64(9)); +select count(1), min(d64), max(d64) from file('02892.orc') where (d64 between '-0.0000011'::Decimal64(7) and 0.0000006::Decimal64(9)); + +select count(), sum(number) from file('02892.orc') where indexHint(d128 between '-0.00000000000011'::Decimal128(20) and 0.00000000000006::Decimal128(20)); +select count(1), min(d128), max(128) from file('02892.orc') where (d128 between '-0.00000000000011'::Decimal128(20) and 0.00000000000006::Decimal128(20)); + +-- Some random other cases. +select count(), sum(number) from file('02892.orc') where indexHint(0); +select count(), min(number), max(number) from file('02892.orc') where indexHint(0); + +select count(), sum(number) from file('02892.orc') where indexHint(s like '99%' or u64 == 2000); +select count(), min(s), max(s) from file('02892.orc') where (s like '99%' or u64 == 2000); + +select count(), sum(number) from file('02892.orc') where indexHint(s like 'z%'); +select count(), min(s), max(s) from file('02892.orc') where (s like 'z%'); + +select count(), sum(number) from file('02892.orc') where indexHint(u8 == 10 or 1 == 1); +select count(), min(u8), max(u8) from file('02892.orc') where (u8 == 10 or 1 == 1); + +select count(), sum(number) from file('02892.orc') where indexHint(u8 < 0); +select count(), min(u8), max(u8) from file('02892.orc') where (u8 < 0); +-- { echoOff } + +-- Nullable and LowCardinality. +insert into function file('02892.orc') select + number, + if(number%234 == 0, NULL, number) as sometimes_null, + toNullable(number) as never_null, + if(number%345 == 0, number::String, NULL) as mostly_null, + toLowCardinality(if(number%234 == 0, NULL, number)) as sometimes_null_lc, + toLowCardinality(toNullable(number)) as never_null_lc, + toLowCardinality(if(number%345 == 0, number::String, NULL)) as mostly_null_lc + from numbers(1000); + +-- { echoOn } +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null is NULL); +select count(), min(sometimes_null), max(sometimes_null) from file('02892.orc') where (sometimes_null is NULL); + +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null_lc is NULL); +select count(), min(sometimes_null_lc), max(sometimes_null_lc) from file('02892.orc') where (sometimes_null_lc is NULL); + +select count(), sum(number) from file('02892.orc') where indexHint(mostly_null is not NULL); +select count(), min(mostly_null), max(mostly_null) from file('02892.orc') where (mostly_null is not NULL); + +select count(), sum(number) from file('02892.orc') where indexHint(mostly_null_lc is not NULL); +select count(), min(mostly_null_lc), max(mostly_null_lc) from file('02892.orc') where (mostly_null_lc is not NULL); + +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null > 850); +select count(), min(sometimes_null), max(sometimes_null) from file('02892.orc') where (sometimes_null > 850); + +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null_lc > 850); +select count(), min(sometimes_null_lc), max(sometimes_null_lc) from file('02892.orc') where (sometimes_null_lc > 850); + +select count(), sum(number) from file('02892.orc') where indexHint(never_null > 850); +select count(), min(never_null), max(never_null) from file('02892.orc') where (never_null > 850); + +select count(), sum(number) from file('02892.orc') where indexHint(never_null_lc > 850); +select count(), min(never_null_lc), max(never_null_lc) from file('02892.orc') where (never_null_lc > 850); + +select count(), sum(number) from file('02892.orc') where indexHint(never_null < 150); +select count(), min(never_null), max(never_null) from file('02892.orc') where (never_null < 150); + +select count(), sum(number) from file('02892.orc') where indexHint(never_null_lc < 150); +select count(), min(never_null_lc), max(never_null_lc) from file('02892.orc') where (never_null_lc < 150); + +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null < 150); +select count(), min(sometimes_null), max(sometimes_null) from file('02892.orc') where (sometimes_null < 150); + +select count(), sum(number) from file('02892.orc') where indexHint(sometimes_null_lc < 150); +select count(), min(sometimes_null_lc), max(sometimes_null_lc) from file('02892.orc') where (sometimes_null_lc < 150); +-- { echoOff } + +-- Settings that affect the table schema or contents. +insert into function file('02892.orc') select + number, + if(number%234 == 0, NULL, number + 100) as positive_or_null, + if(number%234 == 0, NULL, -number - 100) as negative_or_null, + if(number%234 == 0, NULL, 'I am a string') as string_or_null + from numbers(1000); + +-- { echoOn } +select count(), sum(number) from file('02892.orc') where indexHint(positive_or_null < 50); -- quirk with infinities +select count(), min(positive_or_null), max(positive_or_null) from file('02892.orc') where (positive_or_null < 50); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, positive_or_null UInt64') where indexHint(positive_or_null < 50); +select count(), min(positive_or_null), max(positive_or_null) from file('02892.orc', ORC, 'number UInt64, positive_or_null UInt64') where (positive_or_null < 50); + +select count(), sum(number) from file('02892.orc') where indexHint(negative_or_null > -50); +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc') where (negative_or_null > -50); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null > -50); +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null > -50); + +select count(), sum(number) from file('02892.orc') where indexHint(string_or_null == ''); -- quirk with infinities +select count(), min(string_or_null), max(string_or_null) from file('02892.orc') where (string_or_null == ''); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, string_or_null String') where indexHint(string_or_null == ''); +select count(), min(string_or_null), max(string_or_null) from file('02892.orc', ORC, 'number UInt64, string_or_null String') where (string_or_null == ''); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where indexHint(nEgAtIvE_oR_nUlL > -50) settings input_format_orc_case_insensitive_column_matching = 1; +select count(), min(nEgAtIvE_oR_nUlL), max(nEgAtIvE_oR_nUlL) from file('02892.orc', ORC, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where (nEgAtIvE_oR_nUlL > -50) settings input_format_orc_case_insensitive_column_matching = 1; + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null < -500); +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null < -500); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null); +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null is null); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null in (0, -1, -10, -100, -1000)); +select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null in (0, -1, -10, -100, -1000)); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(String)') where indexHint(string_or_null like 'I am%'); +select count(), min(string_or_null), max(string_or_null) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(String)') where (string_or_null like 'I am%'); + +select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(Nullable(String))') where indexHint(string_or_null like 'I am%'); +select count(), min(string_or_null), max(string_or_null) from file('02892.orc', ORC, 'number UInt64, string_or_null LowCardinality(Nullable(String))') where (string_or_null like 'I am%'); +-- { echoOff } diff --git a/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference b/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference new file mode 100644 index 00000000000..9289ddcee34 --- /dev/null +++ b/tests/queries/0_stateless/02892_rocksdb_trivial_count.reference @@ -0,0 +1 @@ +121 diff --git a/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql b/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql new file mode 100644 index 00000000000..0cdf2d1b2b2 --- /dev/null +++ b/tests/queries/0_stateless/02892_rocksdb_trivial_count.sql @@ -0,0 +1,6 @@ +-- Tags: use-rocksdb + +CREATE TABLE dict (key UInt64, value String) ENGINE = EmbeddedRocksDB PRIMARY KEY key; +INSERT INTO dict SELECT number, toString(number) FROM numbers(121); +SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 0, max_rows_to_read = 1; -- { serverError TOO_MANY_ROWS } +SELECT count() FROM dict SETTINGS optimize_trivial_approximate_count_query = 1, max_rows_to_read = 1; diff --git a/tests/queries/0_stateless/02894_MergeSortingPartialResultTransform_empty_block.sql b/tests/queries/0_stateless/02894_MergeSortingPartialResultTransform_empty_block.sql deleted file mode 100644 index 9e665e0ae20..00000000000 --- a/tests/queries/0_stateless/02894_MergeSortingPartialResultTransform_empty_block.sql +++ /dev/null @@ -1,11 +0,0 @@ -drop table if exists data; -create table data (key Int) engine=MergeTree() order by key; -insert into data select * from numbers(1); -insert into data select * from numbers(1); -system stop merges data; --- need sleep to trigger partial results to uncover the bug with empty chunk after remerge due to empty array join, i.e.: --- --- MergeSortingTransform: Re-merging intermediate ORDER BY data (1 blocks with 0 rows) to save memory consumption --- MergeSortingTransform: Memory usage is lowered from 4.26 KiB to 0.00 B --- -select key, sleepEachRow(1) from data array join [] as x order by key settings optimize_read_in_order=0, allow_experimental_partial_result=1, partial_result_update_duration_ms=1, max_threads=1, max_execution_time=0, max_block_size=1; diff --git a/tests/queries/0_stateless/02896_union_distinct_http_format.reference b/tests/queries/0_stateless/02896_union_distinct_http_format.reference new file mode 100644 index 00000000000..3a68ab6dc9f --- /dev/null +++ b/tests/queries/0_stateless/02896_union_distinct_http_format.reference @@ -0,0 +1,7 @@ +┌─1─┐ +│ 1 │ +└───┘ +┌─a─┐ +│ 1 │ +│ 2 │ +└───┘ diff --git a/tests/queries/0_stateless/02896_union_distinct_http_format.sh b/tests/queries/0_stateless/02896_union_distinct_http_format.sh new file mode 100755 index 00000000000..bb35800e39d --- /dev/null +++ b/tests/queries/0_stateless/02896_union_distinct_http_format.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +curl -d@- -sS "${CLICKHOUSE_URL}" <<< 'SELECT 1 UNION DISTINCT SELECT 1 FORMAT PrettyCompactMonoBlock' +curl -d@- -sS "${CLICKHOUSE_URL}" <<< 'SELECT * FROM (SELECT 1 as a UNION DISTINCT SELECT 2 as a) ORDER BY a FORMAT PrettyCompactMonoBlock' diff --git a/tests/queries/0_stateless/02900_add_subtract_interval_with_string_date.reference b/tests/queries/0_stateless/02900_add_subtract_interval_with_string_date.reference new file mode 100644 index 00000000000..fa82844c9f3 --- /dev/null +++ b/tests/queries/0_stateless/02900_add_subtract_interval_with_string_date.reference @@ -0,0 +1,100 @@ +-- const date, const delta + -- add +2024-10-22 00:00:00.000 2024-10-22 00:00:00.000 +2024-01-22 00:00:00.000 2024-01-22 00:00:00.000 +2023-11-22 00:00:00.000 2023-11-22 00:00:00.000 +2023-10-29 00:00:00.000 2023-10-29 00:00:00.000 +2023-10-23 00:00:00.000 2023-10-23 00:00:00.000 +2023-10-22 01:00:00.000 2023-10-22 01:00:00.000 +2023-10-22 00:01:00.000 2023-10-22 00:01:00.000 +2023-10-22 00:00:01.000 2023-10-22 00:00:01.000 +2023-10-22 00:00:00.001 2023-10-22 00:00:00.001 +2023-10-22 00:00:00.000001 2023-10-22 00:00:00.000001 +2023-10-22 00:00:00.000000001 2023-10-22 00:00:00.000000001 + -- subtract +2022-10-22 00:00:00.000 2022-10-22 00:00:00.000 +2023-07-22 00:00:00.000 2023-07-22 00:00:00.000 +2023-09-22 00:00:00.000 2023-09-22 00:00:00.000 +2023-10-15 00:00:00.000 2023-10-15 00:00:00.000 +2023-10-21 00:00:00.000 2023-10-21 00:00:00.000 +2023-10-21 23:00:00.000 2023-10-21 23:00:00.000 +2023-10-21 23:59:00.000 2023-10-21 23:59:00.000 +2023-10-21 23:59:59.000 2023-10-21 23:59:59.000 +2023-10-21 23:59:59.999 2023-10-21 23:59:59.999 +2023-10-21 23:59:59.999999 2023-10-21 23:59:59.999999 +2023-10-21 23:59:59.999999999 2023-10-21 23:59:59.999999999 +-- non-const date, const delta + -- add +2024-10-22 00:00:00.000 2024-10-22 00:00:00.000 +2024-01-22 00:00:00.000 2024-01-22 00:00:00.000 +2023-11-22 00:00:00.000 2023-11-22 00:00:00.000 +2023-10-29 00:00:00.000 2023-10-29 00:00:00.000 +2023-10-23 00:00:00.000 2023-10-23 00:00:00.000 +2023-10-22 01:00:00.000 2023-10-22 01:00:00.000 +2023-10-22 00:01:00.000 2023-10-22 00:01:00.000 +2023-10-22 00:00:01.000 2023-10-22 00:00:01.000 +2023-10-22 00:00:00.001 2023-10-22 00:00:00.001 +2023-10-22 00:00:00.000001 2023-10-22 00:00:00.000001 +2023-10-22 00:00:00.000000001 2023-10-22 00:00:00.000000001 + -- subtract +2022-10-22 00:00:00.000 2022-10-22 00:00:00.000 +2023-07-22 00:00:00.000 2023-07-22 00:00:00.000 +2023-09-22 00:00:00.000 2023-09-22 00:00:00.000 +2023-10-15 00:00:00.000 2023-10-15 00:00:00.000 +2023-10-21 00:00:00.000 2023-10-21 00:00:00.000 +2023-10-21 23:00:00.000 2023-10-21 23:00:00.000 +2023-10-21 23:59:00.000 2023-10-21 23:59:00.000 +2023-10-21 23:59:59.000 2023-10-21 23:59:59.000 +2023-10-21 23:59:59.999 2023-10-21 23:59:59.999 +2023-10-21 23:59:59.999999 2023-10-21 23:59:59.999999 +2023-10-21 23:59:59.999999999 2023-10-21 23:59:59.999999999 +-- const date, non-const delta + -- add +2024-10-22 00:00:00.000 2024-10-22 00:00:00.000 +2024-01-22 00:00:00.000 2024-01-22 00:00:00.000 +2023-11-22 00:00:00.000 2023-11-22 00:00:00.000 +2023-10-29 00:00:00.000 2023-10-29 00:00:00.000 +2023-10-23 00:00:00.000 2023-10-23 00:00:00.000 +2023-10-22 01:00:00.000 2023-10-22 01:00:00.000 +2023-10-22 00:01:00.000 2023-10-22 00:01:00.000 +2023-10-22 00:00:01.000 2023-10-22 00:00:01.000 +2023-10-22 00:00:00.001 2023-10-22 00:00:00.001 +2023-10-22 00:00:00.000001 2023-10-22 00:00:00.000001 +2023-10-22 00:00:00.000000001 2023-10-22 00:00:00.000000001 + -- subtract +2022-10-22 00:00:00.000 2022-10-22 00:00:00.000 +2023-07-22 00:00:00.000 2023-07-22 00:00:00.000 +2023-09-22 00:00:00.000 2023-09-22 00:00:00.000 +2023-10-15 00:00:00.000 2023-10-15 00:00:00.000 +2023-10-21 00:00:00.000 2023-10-21 00:00:00.000 +2023-10-21 23:00:00.000 2023-10-21 23:00:00.000 +2023-10-21 23:59:00.000 2023-10-21 23:59:00.000 +2023-10-21 23:59:59.000 2023-10-21 23:59:59.000 +2023-10-21 23:59:59.999 2023-10-21 23:59:59.999 +2023-10-21 23:59:59.999999 2023-10-21 23:59:59.999999 +2023-10-21 23:59:59.999999999 2023-10-21 23:59:59.999999999 +-- non-const date, non-const delta + -- add +2024-10-22 00:00:00.000 2024-10-22 00:00:00.000 +2024-01-22 00:00:00.000 2024-01-22 00:00:00.000 +2023-11-22 00:00:00.000 2023-11-22 00:00:00.000 +2023-10-29 00:00:00.000 2023-10-29 00:00:00.000 +2023-10-23 00:00:00.000 2023-10-23 00:00:00.000 +2023-10-22 01:00:00.000 2023-10-22 01:00:00.000 +2023-10-22 00:01:00.000 2023-10-22 00:01:00.000 +2023-10-22 00:00:01.000 2023-10-22 00:00:01.000 +2023-10-22 00:00:00.001 2023-10-22 00:00:00.001 +2023-10-22 00:00:00.000001 2023-10-22 00:00:00.000001 +2023-10-22 00:00:00.000000001 2023-10-22 00:00:00.000000001 + -- subtract +2022-10-22 00:00:00.000 2022-10-22 00:00:00.000 +2023-07-22 00:00:00.000 2023-07-22 00:00:00.000 +2023-09-22 00:00:00.000 2023-09-22 00:00:00.000 +2023-10-15 00:00:00.000 2023-10-15 00:00:00.000 +2023-10-21 00:00:00.000 2023-10-21 00:00:00.000 +2023-10-21 23:00:00.000 2023-10-21 23:00:00.000 +2023-10-21 23:59:00.000 2023-10-21 23:59:00.000 +2023-10-21 23:59:59.000 2023-10-21 23:59:59.000 +2023-10-21 23:59:59.999 2023-10-21 23:59:59.999 +2023-10-21 23:59:59.999999 2023-10-21 23:59:59.999999 +2023-10-21 23:59:59.999999999 2023-10-21 23:59:59.999999999 diff --git a/tests/queries/0_stateless/02900_add_subtract_interval_with_string_date.sql b/tests/queries/0_stateless/02900_add_subtract_interval_with_string_date.sql new file mode 100644 index 00000000000..6a02840617b --- /dev/null +++ b/tests/queries/0_stateless/02900_add_subtract_interval_with_string_date.sql @@ -0,0 +1,111 @@ +SELECT '-- const date, const delta'; + +SELECT ' -- add'; +SELECT addYears('2023-10-22', 1), addYears('2023-10-22 00:00:00.000', 1); +SELECT addQuarters('2023-10-22', 1), addQuarters('2023-10-22 00:00:00.000', 1); +SELECT addMonths('2023-10-22', 1), addMonths('2023-10-22 00:00:00.000', 1); +SELECT addWeeks('2023-10-22', 1), addWeeks('2023-10-22 00:00:00.000', 1); +SELECT addDays('2023-10-22', 1), addDays('2023-10-22 00:00:00.000', 1); +SELECT addHours('2023-10-22', 1), addHours('2023-10-22 00:00:00.000', 1); +SELECT addMinutes('2023-10-22', 1), addMinutes('2023-10-22 00:00:00.000', 1); +SELECT addSeconds('2023-10-22', 1), addSeconds('2023-10-22 00:00:00.000', 1); +SELECT addMilliseconds('2023-10-22', 1), addMilliseconds('2023-10-22 00:00:00.000', 1); +SELECT addMicroseconds('2023-10-22', 1), addMicroseconds('2023-10-22 00:00:00.000', 1); +SELECT addNanoseconds('2023-10-22', 1), addNanoseconds('2023-10-22 00:00:00.000', 1); + +SELECT ' -- subtract'; +SELECT subtractYears('2023-10-22', 1), subtractYears('2023-10-22 00:00:00.000', 1); +SELECT subtractQuarters('2023-10-22', 1), subtractQuarters('2023-10-22 00:00:00.000', 1); +SELECT subtractMonths('2023-10-22', 1), subtractMonths('2023-10-22 00:00:00.000', 1); +SELECT subtractWeeks('2023-10-22', 1), subtractWeeks('2023-10-22 00:00:00.000', 1); +SELECT subtractDays('2023-10-22', 1), subtractDays('2023-10-22 00:00:00.000', 1); +SELECT subtractHours('2023-10-22', 1), subtractHours('2023-10-22 00:00:00.000', 1); +SELECT subtractMinutes('2023-10-22', 1), subtractMinutes('2023-10-22 00:00:00.000', 1); +SELECT subtractSeconds('2023-10-22', 1), subtractSeconds('2023-10-22 00:00:00.000', 1); +SELECT subtractMilliseconds('2023-10-22', 1), subtractMilliseconds('2023-10-22 00:00:00.000', 1); +SELECT subtractMicroseconds('2023-10-22', 1), subtractMicroseconds('2023-10-22 00:00:00.000', 1); +SELECT subtractNanoseconds('2023-10-22', 1), subtractNanoseconds('2023-10-22 00:00:00.000', 1); + +SELECT '-- non-const date, const delta'; + +SELECT ' -- add'; +SELECT addYears(materialize('2023-10-22'), 1), addYears(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addQuarters(materialize('2023-10-22'), 1), addQuarters(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addMonths(materialize('2023-10-22'), 1), addMonths(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addWeeks(materialize('2023-10-22'), 1), addWeeks(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addDays(materialize('2023-10-22'), 1), addDays(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addHours(materialize('2023-10-22'), 1), addHours(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addMinutes(materialize('2023-10-22'), 1), addMinutes(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addSeconds(materialize('2023-10-22'), 1), addSeconds(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addMilliseconds(materialize('2023-10-22'), 1), addMilliseconds(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addMicroseconds(materialize('2023-10-22'), 1), addMicroseconds(materialize('2023-10-22 00:00:00.000'), 1); +SELECT addNanoseconds(materialize('2023-10-22'), 1), addNanoseconds(materialize('2023-10-22 00:00:00.000'), 1); + +SELECT ' -- subtract'; +SELECT subtractYears(materialize('2023-10-22'), 1), subtractYears(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractQuarters(materialize('2023-10-22'), 1), subtractQuarters(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractMonths(materialize('2023-10-22'), 1), subtractMonths(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractWeeks(materialize('2023-10-22'), 1), subtractWeeks(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractDays(materialize('2023-10-22'), 1), subtractDays(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractHours(materialize('2023-10-22'), 1), subtractHours(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractMinutes(materialize('2023-10-22'), 1), subtractMinutes(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractSeconds(materialize('2023-10-22'), 1), subtractSeconds(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractMilliseconds(materialize('2023-10-22'), 1), subtractMilliseconds(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractMicroseconds(materialize('2023-10-22'), 1), subtractMicroseconds(materialize('2023-10-22 00:00:00.000'), 1); +SELECT subtractNanoseconds(materialize('2023-10-22'), 1), subtractNanoseconds(materialize('2023-10-22 00:00:00.000'), 1); + +SELECT '-- const date, non-const delta'; + +SELECT ' -- add'; +SELECT addYears('2023-10-22', materialize(1)), addYears('2023-10-22 00:00:00.000', materialize(1)); +SELECT addQuarters('2023-10-22', materialize(1)), addQuarters('2023-10-22 00:00:00.000', materialize(1)); +SELECT addMonths('2023-10-22', materialize(1)), addMonths('2023-10-22 00:00:00.000', materialize(1)); +SELECT addWeeks('2023-10-22', materialize(1)), addWeeks('2023-10-22 00:00:00.000', materialize(1)); +SELECT addDays('2023-10-22', materialize(1)), addDays('2023-10-22 00:00:00.000', materialize(1)); +SELECT addHours('2023-10-22', materialize(1)), addHours('2023-10-22 00:00:00.000', materialize(1)); +SELECT addMinutes('2023-10-22', materialize(1)), addMinutes('2023-10-22 00:00:00.000', materialize(1)); +SELECT addSeconds('2023-10-22', materialize(1)), addSeconds('2023-10-22 00:00:00.000', materialize(1)); +SELECT addMilliseconds('2023-10-22', materialize(1)), addMilliseconds('2023-10-22 00:00:00.000', materialize(1)); +SELECT addMicroseconds('2023-10-22', materialize(1)), addMicroseconds('2023-10-22 00:00:00.000', materialize(1)); +SELECT addNanoseconds('2023-10-22', materialize(1)), addNanoseconds('2023-10-22 00:00:00.000', materialize(1)); + +SELECT ' -- subtract'; +SELECT subtractYears('2023-10-22', materialize(1)), subtractYears('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractQuarters('2023-10-22', materialize(1)), subtractQuarters('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractMonths('2023-10-22', materialize(1)), subtractMonths('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractWeeks('2023-10-22', materialize(1)), subtractWeeks('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractDays('2023-10-22', materialize(1)), subtractDays('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractHours('2023-10-22', materialize(1)), subtractHours('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractMinutes('2023-10-22', materialize(1)), subtractMinutes('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractSeconds('2023-10-22', materialize(1)), subtractSeconds('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractMilliseconds('2023-10-22', materialize(1)), subtractMilliseconds('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractMicroseconds('2023-10-22', materialize(1)), subtractMicroseconds('2023-10-22 00:00:00.000', materialize(1)); +SELECT subtractNanoseconds('2023-10-22', materialize(1)), subtractNanoseconds('2023-10-22 00:00:00.000', materialize(1)); + +SELECT '-- non-const date, non-const delta'; + +SELECT ' -- add'; +SELECT addYears(materialize('2023-10-22'), materialize(1)), addYears(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addQuarters(materialize('2023-10-22'), materialize(1)), addQuarters(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addMonths(materialize('2023-10-22'), materialize(1)), addMonths(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addWeeks(materialize('2023-10-22'), materialize(1)), addWeeks(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addDays(materialize('2023-10-22'), materialize(1)), addDays(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addHours(materialize('2023-10-22'), materialize(1)), addHours(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addMinutes(materialize('2023-10-22'), materialize(1)), addMinutes(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addSeconds(materialize('2023-10-22'), materialize(1)), addSeconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addMilliseconds(materialize('2023-10-22'), materialize(1)), addMilliseconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addMicroseconds(materialize('2023-10-22'), materialize(1)), addMicroseconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT addNanoseconds(materialize('2023-10-22'), materialize(1)), addNanoseconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); + +SELECT ' -- subtract'; +SELECT subtractYears(materialize('2023-10-22'), materialize(1)), subtractYears(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractQuarters(materialize('2023-10-22'), materialize(1)), subtractQuarters(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractMonths(materialize('2023-10-22'), materialize(1)), subtractMonths(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractWeeks(materialize('2023-10-22'), materialize(1)), subtractWeeks(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractDays(materialize('2023-10-22'), materialize(1)), subtractDays(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractHours(materialize('2023-10-22'), materialize(1)), subtractHours(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractMinutes(materialize('2023-10-22'), materialize(1)), subtractMinutes(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractSeconds(materialize('2023-10-22'), materialize(1)), subtractSeconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractMilliseconds(materialize('2023-10-22'), materialize(1)), subtractMilliseconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractMicroseconds(materialize('2023-10-22'), materialize(1)), subtractMicroseconds(materialize('2023-10-22 00:00:00.000'), materialize(1)); +SELECT subtractNanoseconds(materialize('2023-10-22'), materialize(1)), subtractNanoseconds(materialize('2023-10-22 00:00:00.000'), 1); diff --git a/tests/queries/0_stateless/02900_decimal_sort_with_multiple_columns.reference b/tests/queries/0_stateless/02900_decimal_sort_with_multiple_columns.reference new file mode 100644 index 00000000000..f99785d2b99 --- /dev/null +++ b/tests/queries/0_stateless/02900_decimal_sort_with_multiple_columns.reference @@ -0,0 +1,600 @@ +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 1 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 3 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 5 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 7 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 9 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 11 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 13 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 15 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 17 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +-1 19 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 2 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 4 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 6 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 8 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 10 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 12 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 14 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 16 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 +0 18 diff --git a/tests/queries/0_stateless/02900_decimal_sort_with_multiple_columns.sql b/tests/queries/0_stateless/02900_decimal_sort_with_multiple_columns.sql new file mode 100644 index 00000000000..bc74add253b --- /dev/null +++ b/tests/queries/0_stateless/02900_decimal_sort_with_multiple_columns.sql @@ -0,0 +1 @@ +select -number % 2 as i, toDecimal32(number % 20, 3) as j from numbers(600) order by i, j; diff --git a/tests/queries/0_stateless/02900_matview_create_to_errors.reference b/tests/queries/0_stateless/02900_matview_create_to_errors.reference new file mode 100644 index 00000000000..d516bed81ab --- /dev/null +++ b/tests/queries/0_stateless/02900_matview_create_to_errors.reference @@ -0,0 +1,4 @@ +Code: 62. DB::Ex---tion: When creating a materialized view you can't declare both 'TO [db].[table]' and 'EMPTY'. (SYNTAX_ERROR) (version reference) +Code: 62. DB::Ex---tion: When creating a materialized view you can't declare both 'TO [db].[table]' and 'POPULATE'. (SYNTAX_ERROR) (version reference) +Code: 62. DB::Ex---tion: When creating a materialized view you can't declare both 'TO [db].[table]' and 'ENGINE'. (SYNTAX_ERROR) (version reference) +Code: 62. DB::Ex---tion: When creating a materialized view you can't declare both 'ENGINE' and 'TO [db].[table]'. (SYNTAX_ERROR) (version reference) diff --git a/tests/queries/0_stateless/02900_matview_create_to_errors.sh b/tests/queries/0_stateless/02900_matview_create_to_errors.sh new file mode 100755 index 00000000000..a709bd2f7a1 --- /dev/null +++ b/tests/queries/0_stateless/02900_matview_create_to_errors.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'create materialized view aaaa TO b EMPTY as Select * from a;' | sed -e 's/Exception/Ex---tion/ ; s/version .*/version reference)/g' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'create materialized view aaaa TO b POPULATE as Select * from a;' | sed -e 's/Exception/Ex---tion/ ; s/version .*/version reference)/g' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'create materialized view aaaa TO b ENGINE = MergeTree() as Select * from a;' | sed -e 's/Exception/Ex---tion/ ; s/version .*/version reference)/g' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'create materialized view aaaa ENGINE = MergeTree() TO b as Select * from a;' | sed -e 's/Exception/Ex---tion/ ; s/version .*/version reference)/g' diff --git a/tests/queries/0_stateless/02900_window_function_with_sparse_column.reference b/tests/queries/0_stateless/02900_window_function_with_sparse_column.reference new file mode 100644 index 00000000000..694d752d6a8 --- /dev/null +++ b/tests/queries/0_stateless/02900_window_function_with_sparse_column.reference @@ -0,0 +1,5 @@ +false +false +false + + diff --git a/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql b/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql new file mode 100644 index 00000000000..0b053d3e870 --- /dev/null +++ b/tests/queries/0_stateless/02900_window_function_with_sparse_column.sql @@ -0,0 +1,45 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55843 +-- These tests pass without the fix when either of +-- - optimize_read_in_window_order = 0 and optimize_read_in_order = 0 +-- - ratio_of_defaults_for_sparse_serialization = 1 +-- However it is better to leave the settings as randomized because we run +-- stateless tests quite a few times during a PR, so if a bug is introduced +-- then there is a big chance of catching it. Furthermore, randomized settings +-- might identify new bugs. + +CREATE TABLE test1 +( + id String, + time DateTime64(9), + key Int64, + value Bool, +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(time) +ORDER BY (key, id, time); + +INSERT INTO test1 VALUES ('id0', now(), 3, false) + +SELECT last_value(value) OVER (PARTITION BY id ORDER BY time ASC) as last_value +FROM test1 +WHERE (key = 3); + +SELECT last_value(value) OVER (ORDER BY time ASC) as last_value +FROM test1 +WHERE (key = 3); + +SELECT last_value(value) OVER (PARTITION BY id ORDER BY time ASC) as last_value +FROM test1; + + + +CREATE TABLE test2 +( + time DateTime, + value String +) +ENGINE = MergeTree +ORDER BY (time) AS SELECT 0, ''; + +SELECT any(value) OVER (ORDER BY time ASC) FROM test2; +SELECT last_value(value) OVER (ORDER BY time ASC) FROM test2; diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference b/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference new file mode 100644 index 00000000000..29004fde9d4 --- /dev/null +++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference @@ -0,0 +1,15 @@ +1 +02901_parallel_replicas_rollup-default Used parallel replicas: true +0 0 0 6 +2019 0 0 2 +2019 1 0 2 +2019 1 5 1 +2019 1 15 1 +2020 0 0 4 +2020 1 0 2 +2020 1 5 1 +2020 1 15 1 +2020 10 0 2 +2020 10 5 1 +2020 10 15 1 +02901_parallel_replicas_rollup2-default Used parallel replicas: true diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh new file mode 100755 index 00000000000..f23b80348c1 --- /dev/null +++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function were_parallel_replicas_used () +{ + $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + + # Not using current_database = '$CLICKHOUSE_DATABASE' as nested parallel queries aren't run with it + $CLICKHOUSE_CLIENT --query " + SELECT + initial_query_id, + concat('Used parallel replicas: ', (countIf(initial_query_id != query_id) != 0)::bool::String) as used + FROM system.query_log + WHERE event_date >= yesterday() + AND initial_query_id = '$1' + GROUP BY initial_query_id + ORDER BY min(event_time_microseconds) ASC + FORMAT TSV" +} + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS nested" +$CLICKHOUSE_CLIENT --query "CREATE TABLE nested (x UInt8) ENGINE = MergeTree ORDER BY () AS Select 1"; + +query_id="02901_parallel_replicas_rollup-$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT \ + --query_id "${query_id}" \ + --max_parallel_replicas 3 \ + --prefer_localhost_replica 1 \ + --use_hedged_requests 0 \ + --cluster_for_parallel_replicas "parallel_replicas" \ + --allow_experimental_parallel_reading_from_replicas 1 \ + --parallel_replicas_for_non_replicated_merge_tree 1 \ + --parallel_replicas_min_number_of_rows_per_replica 0 \ + --query " + SELECT 1 FROM nested + GROUP BY 1 WITH ROLLUP + ORDER BY max((SELECT 1 WHERE 0)); +"; +were_parallel_replicas_used $query_id +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS nested" + + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS days" +$CLICKHOUSE_CLIENT --query " + CREATE TABLE days + ( + year Int64, + month Int64, + day Int64 + ) + ENGINE = MergeTree() + ORDER BY year"; +$CLICKHOUSE_CLIENT --query " + INSERT INTO days VALUES (2019, 1, 5), (2019, 1, 15), (2020, 1, 5), (2020, 1, 15), (2020, 10, 5), (2020, 10, 15); +"; + +# Note that we enforce ordering of the final output because it's not guaranteed by GROUP BY ROLLUP, only the values of count() are +query_id="02901_parallel_replicas_rollup2-$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT \ + --query_id "${query_id}" \ + --max_parallel_replicas 3 \ + --prefer_localhost_replica 1 \ + --use_hedged_requests 0 \ + --cluster_for_parallel_replicas "parallel_replicas" \ + --allow_experimental_parallel_reading_from_replicas 1 \ + --parallel_replicas_for_non_replicated_merge_tree 1 \ + --parallel_replicas_min_number_of_rows_per_replica 0 \ + --query "SELECT * FROM (SELECT year, month, day, count(*) FROM days GROUP BY year, month, day WITH ROLLUP) ORDER BY 1, 2, 3"; + +were_parallel_replicas_used $query_id +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS days" diff --git a/tests/queries/0_stateless/02901_predicate_pushdown_cte_stateful.reference b/tests/queries/0_stateless/02901_predicate_pushdown_cte_stateful.reference new file mode 100644 index 00000000000..2e9c1e130da --- /dev/null +++ b/tests/queries/0_stateless/02901_predicate_pushdown_cte_stateful.reference @@ -0,0 +1 @@ +558 diff --git a/tests/queries/0_stateless/02901_predicate_pushdown_cte_stateful.sql b/tests/queries/0_stateless/02901_predicate_pushdown_cte_stateful.sql new file mode 100644 index 00000000000..f63e1d59471 --- /dev/null +++ b/tests/queries/0_stateless/02901_predicate_pushdown_cte_stateful.sql @@ -0,0 +1,20 @@ +CREATE TABLE t +( + `rDate` String, + `cpu_total` Int64 +) +ENGINE = Log; + +insert into t values ('2022-03-06', 22442 ), ('2022-03-05', 22382 ), ('2022-03-04', 22395 ), ('2022-03-03', 22306 ), ('2022-03-02', 22095 ), ('2022-03-01', 22065 ), ('2022-02-28', 21949 ), ('2022-02-27', 21884 ), ('2022-02-26', 21875 ), ('2022-02-25', 21858 ), ('2022-02-24', 21775 ), ('2022-02-23', 21639 ), ('2022-02-22', 21557 ), ('2022-02-21', 21381 ), ('2022-02-20', 21794 ), ('2022-02-19', 21808 ), ('2022-02-18', 21695 ), ('2022-02-17', 20874 ), ('2022-02-16', 20911 ), ('2022-02-15', 20898 ), ('2022-02-14', 20768 ), ('2022-02-13', 20588 ), ('2022-02-12', 20516 ), ('2022-02-11', 20501 ), ('2022-02-10', 20429 ), ('2022-02-09', 20208 ), ('2022-02-08', 20186 ), ('2022-02-07', 20192 ), ('2022-02-06', 20192 ), ('2022-02-05', 20175 ), ('2022-02-04', 20191 ), ('2022-02-03', 20214 ), ('2022-02-02', 20215 ), ('2022-02-01', 20220 ), ('2022-01-31', 20146 ), ('2022-01-30', 20137 ), ('2022-01-29', 20162 ), ('2022-01-28', 20164 ), ('2022-01-27', 20128 ), ('2022-01-26', 20139 ), ('2022-01-25', 20000 ), ('2022-01-24', 19778 ), ('2022-01-23', 19789 ), ('2022-01-22', 19628 ), ('2022-01-21', 19631 ), ('2022-01-20', 19386 ), ('2022-01-19', 19439 ), ('2022-01-18', 19477 ), ('2022-01-17', 19386 ), ('2022-01-16', 20013 ), ('2022-01-15', 19359 ), ('2022-01-14', 19356 ), ('2022-01-13', 19300 ), ('2022-01-12', 19237 ), ('2022-01-11', 19159 ), ('2022-01-10', 18970 ), ('2022-01-09', 18804 ), ('2022-01-08', 18816 ), ('2022-01-07', 18808 ), ('2022-01-06', 18693 ), ('2022-01-05', 18639 ), ('2022-01-04', 18579 ), ('2022-01-03', 18450 ), ('2022-01-02', 18458 ), ('2022-01-01', 18445 ), ('2021-12-31', 18443 ), ('2021-12-30', 18388 ), ('2021-12-29', 18348 ), ('2021-12-28', 18042 ), ('2021-12-26', 18049 ), ('2021-12-22', 17962 ); + +SELECT cpu_total_week +FROM + ( + WITH neighbor(cpu_total, 7) AS cpu_total_7 + SELECT + rDate, + floor(multiIf(cpu_total_7 = 0, 0, cpu_total - cpu_total_7), 2) AS cpu_total_week + FROM t + ) AS t_table_471873 +WHERE (rDate >= '2022-03-06') AND (rDate <= '2022-03-06') +SETTINGS enable_optimize_predicate_expression = 1; diff --git a/tests/queries/0_stateless/02902_diable_apply_deleted_mask.reference b/tests/queries/0_stateless/02902_diable_apply_deleted_mask.reference new file mode 100644 index 00000000000..9563c41e550 --- /dev/null +++ b/tests/queries/0_stateless/02902_diable_apply_deleted_mask.reference @@ -0,0 +1,16 @@ +Normal SELECT does not see deleted rows +1 1 1 +3 3 1 +With the setting disabled the deleted rows are visible +0 0 0 +1 1 1 +2 2 0 +3 3 1 +4 4 0 +With the setting disabled the deleted rows are visible but still can be filterd out +1 1 +3 3 +Read the data after OPTIMIZE, all deleted rwos should be physically removed now +1 1 1 +3 3 1 +5 5 1 diff --git a/tests/queries/0_stateless/02902_diable_apply_deleted_mask.sql b/tests/queries/0_stateless/02902_diable_apply_deleted_mask.sql new file mode 100644 index 00000000000..81d5714f00b --- /dev/null +++ b/tests/queries/0_stateless/02902_diable_apply_deleted_mask.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS test_apply_deleted_mask; + +CREATE TABLE test_apply_deleted_mask(id Int64, value String) ENGINE = MergeTree ORDER BY id; + +INSERT INTO test_apply_deleted_mask SELECT number, number::String FROM numbers(5); + +DELETE FROM test_apply_deleted_mask WHERE id % 2 = 0; + +SELECT 'Normal SELECT does not see deleted rows'; +SELECT *, _row_exists FROM test_apply_deleted_mask; + +SELECT 'With the setting disabled the deleted rows are visible'; +SELECT *, _row_exists FROM test_apply_deleted_mask SETTINGS apply_deleted_mask = 0; + +SELECT 'With the setting disabled the deleted rows are visible but still can be filterd out'; +SELECT * FROM test_apply_deleted_mask WHERE _row_exists SETTINGS apply_deleted_mask = 0; + +INSERT INTO test_apply_deleted_mask SELECT number, number::String FROM numbers(5, 1); + +OPTIMIZE TABLE test_apply_deleted_mask FINAL SETTINGS mutations_sync=2; + +SELECT 'Read the data after OPTIMIZE, all deleted rwos should be physically removed now'; +SELECT *, _row_exists FROM test_apply_deleted_mask SETTINGS apply_deleted_mask = 0; + +DROP TABLE test_apply_deleted_mask; \ No newline at end of file diff --git a/tests/queries/0_stateless/02902_json_skip_null_values.reference b/tests/queries/0_stateless/02902_json_skip_null_values.reference new file mode 100644 index 00000000000..776b1983416 --- /dev/null +++ b/tests/queries/0_stateless/02902_json_skip_null_values.reference @@ -0,0 +1,12 @@ +{"c":{"a":"0","c":{"x":"0"}}} +{"c":{"a":"1","c":{"x":"1"}}} +{"c":{"a":"2","c":{"x":"2"}}} +{"c":{"a":"0","b":null,"c":{"x":"0","y":null}}} +{"c":{"a":"1","b":null,"c":{"x":"1","y":null}}} +{"c":{"a":"2","b":null,"c":{"x":"2","y":null}}} +{"a":"0","b":null,"c":{"x":"0","y":null}} +{"a":"1","b":null,"c":{"x":"1","y":null}} +{"a":"2","b":null,"c":{"x":"2","y":null}} +{"a":"0","c":{"x":"0"}} +{"a":"1","c":{"x":"1"}} +{"a":"2","c":{"x":"2"}} diff --git a/tests/queries/0_stateless/02902_json_skip_null_values.sql b/tests/queries/0_stateless/02902_json_skip_null_values.sql new file mode 100644 index 00000000000..24b162216bb --- /dev/null +++ b/tests/queries/0_stateless/02902_json_skip_null_values.sql @@ -0,0 +1,39 @@ +-- Tags: no-fasttest + +create table test_02902 engine File(JSONEachRow) + settings output_format_json_named_tuples_as_objects = 1, output_format_json_skip_null_value_in_named_tuples = 1 + as select cast((number::String, null, (number::String, null)), 'Tuple(a Nullable(String), b Nullable(Int64), c Tuple(x Nullable(String), y Nullable(Float64)))') as c + from numbers(3); + +select * from test_02902 format JSONEachRow settings output_format_json_named_tuples_as_objects = 1, output_format_json_skip_null_value_in_named_tuples = 1; +select * from test_02902 format JSONEachRow settings output_format_json_named_tuples_as_objects = 1, output_format_json_skip_null_value_in_named_tuples = 0; + +drop table test_02902; + +select + toJSONString(c) +from + ( + select + cast( + (number:: String, null, (number:: String, null)), + 'Tuple(a Nullable(String), b Nullable(Int64), c Tuple(x Nullable(String), y Nullable(Float64)))' + ) as c + from + numbers(3) + ) +settings output_format_json_named_tuples_as_objects = 1, output_format_json_skip_null_value_in_named_tuples = 0; + +select + toJSONString(c) +from + ( + select + cast( + (number:: String, null, (number:: String, null)), + 'Tuple(a Nullable(String), b Nullable(Int64), c Tuple(x Nullable(String), y Nullable(Float64)))' + ) as c + from + numbers(3) + ) +settings output_format_json_named_tuples_as_objects = 1, output_format_json_skip_null_value_in_named_tuples = 1; diff --git a/tests/queries/0_stateless/02902_select_subcolumns_from_engine_null.reference b/tests/queries/0_stateless/02902_select_subcolumns_from_engine_null.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02902_select_subcolumns_from_engine_null.sql b/tests/queries/0_stateless/02902_select_subcolumns_from_engine_null.sql new file mode 100644 index 00000000000..4cd6e68fdaf --- /dev/null +++ b/tests/queries/0_stateless/02902_select_subcolumns_from_engine_null.sql @@ -0,0 +1,6 @@ +CREATE TABLE null_02902 (t Tuple(num Int64, str String)) ENGINE = Null; +SELECT t FROM null_02902; +SELECT tupleElement(t, 'num') FROM null_02902; +SELECT t.num, t.str FROM null_02902; + +DROP TABLE null_02902; diff --git a/tests/queries/0_stateless/02902_show_databases_limit.reference b/tests/queries/0_stateless/02902_show_databases_limit.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02902_show_databases_limit.sql b/tests/queries/0_stateless/02902_show_databases_limit.sql new file mode 100644 index 00000000000..e13ae5a7e34 --- /dev/null +++ b/tests/queries/0_stateless/02902_show_databases_limit.sql @@ -0,0 +1 @@ +SHOW DATABASES LIMIT 0; diff --git a/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.reference b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql new file mode 100644 index 00000000000..3228810e0ba --- /dev/null +++ b/tests/queries/0_stateless/02902_topKGeneric_deserialization_memory.sql @@ -0,0 +1,9 @@ +-- Tags: no-fasttest + +-- https://github.com/ClickHouse/ClickHouse/issues/49706 +-- Using format Parquet for convenience so it errors out without output (but still deserializes the output) +-- Without the fix this would OOM the client when deserializing the state +SELECT + topKResampleState(1048576, 257, 65536, 10)(toString(number), number) +FROM numbers(3) +FORMAT Parquet; -- { clientError UNKNOWN_TYPE } diff --git a/tests/queries/0_stateless/02903_bug_43644.reference b/tests/queries/0_stateless/02903_bug_43644.reference new file mode 100644 index 00000000000..2993076a861 --- /dev/null +++ b/tests/queries/0_stateless/02903_bug_43644.reference @@ -0,0 +1 @@ +2022-11-24 12:00:00 diff --git a/tests/queries/0_stateless/02903_bug_43644.sql b/tests/queries/0_stateless/02903_bug_43644.sql new file mode 100644 index 00000000000..c86988f8346 --- /dev/null +++ b/tests/queries/0_stateless/02903_bug_43644.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + `machine_id` UInt64, + `name` String, + `timestamp` DateTime +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(timestamp) +ORDER BY machine_id; + +insert into tab(machine_id, name, timestamp) +select 1, 'a_name', '2022-11-24 12:00:00'; + +SELECT + toStartOfInterval(timestamp, INTERVAL 300 SECOND) AS ts +FROM tab +WHERE ts > '2022-11-24 11:19:00' +GROUP BY ts; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02903_empty_order_by_throws_error.reference b/tests/queries/0_stateless/02903_empty_order_by_throws_error.reference new file mode 100644 index 00000000000..2c94e483710 --- /dev/null +++ b/tests/queries/0_stateless/02903_empty_order_by_throws_error.reference @@ -0,0 +1,2 @@ +OK +OK diff --git a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh new file mode 100755 index 00000000000..64f5dd1a987 --- /dev/null +++ b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# setting disabled and no order by or primary key; expect error +$CLICKHOUSE_CLIENT -n --query=" + DROP TABLE IF EXISTS test_empty_order_by; + CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; +" 2>&1 \ | grep -F -q "You must provide an ORDER BY or PRIMARY KEY expression in the table definition." && echo 'OK' || echo 'FAIL' + +# setting disabled and primary key in table definition +$CLICKHOUSE_CLIENT -n --query=" + DROP TABLE IF EXISTS test_empty_order_by; + CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() PRIMARY KEY a SETTINGS index_granularity = 8192; + SHOW CREATE TABLE test_empty_order_by; +" 2>&1 \ | grep -F -q "ORDER BY a" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.reference b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.reference new file mode 100644 index 00000000000..6f9488be24d --- /dev/null +++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.reference @@ -0,0 +1 @@ +Information 1 diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh new file mode 100755 index 00000000000..074a3a6725e --- /dev/null +++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# Tags: no-ordinary-database +# Tag no-ordinary-database: requires UUID + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Test that retriable errors during merges/mutations +# (i.e. "No active replica has part X or covering part") +# does not appears as errors (level=Error), only as info message (level=Information). + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists rmt1; + drop table if exists rmt2; + + create table rmt1 (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', '1') order by key settings always_fetch_merged_part=1; + create table rmt2 (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', '2') order by key settings always_fetch_merged_part=0; + + insert into rmt1 values (1); + insert into rmt1 values (2); + + system sync replica rmt1; + system stop pulling replication log rmt2; + optimize table rmt1 final settings alter_sync=0, optimize_throw_if_noop=1; +" || exit 1 + +table_uuid=$($CLICKHOUSE_CLIENT -q "select uuid from system.tables where database = currentDatabase() and table = 'rmt1'") +if [[ -z $table_uuid ]]; then + echo "Table does not have UUID" >&2 + exit 1 +fi + +# NOTE: that part name can be different from all_0_1_1, in case of ZooKeeper retries +part_name='%' + +# wait while there be at least one 'No active replica has part all_0_1_1 or covering part' in logs +for _ in {0..50}; do + no_active_repilica_messages=$($CLICKHOUSE_CLIENT -nm -q " + system flush logs; + + select count() + from system.text_log + where + event_date >= yesterday() and event_time >= now() - 600 and + ( + (logger_name = 'MergeTreeBackgroundExecutor' and message like '%{$table_uuid::$part_name}%No active replica has part $part_name or covering part%') or + (logger_name like '$table_uuid::$part_name (MergeFromLogEntryTask)' and message like '%No active replica has part $part_name or covering part%') + ); + ") + if [[ $no_active_repilica_messages -gt 0 ]]; then + break + fi + # too frequent "system flush logs" causes troubles + sleep 1 +done + +$CLICKHOUSE_CLIENT -nm -q " + system start pulling replication log rmt2; + system flush logs; + + select + level, count() > 0 + from system.text_log + where + event_date >= yesterday() and event_time >= now() - 600 and + ( + (logger_name = 'MergeTreeBackgroundExecutor' and message like '%{$table_uuid::$part_name}%No active replica has part $part_name or covering part%') or + (logger_name like '$table_uuid::$part_name (MergeFromLogEntryTask)' and message like '%No active replica has part $part_name or covering part%') + ) + group by level; +" diff --git a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.reference b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.reference new file mode 100644 index 00000000000..b462a5a7baa --- /dev/null +++ b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.reference @@ -0,0 +1,4 @@ +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh new file mode 100755 index 00000000000..7ac9b488be5 --- /dev/null +++ b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# setting enabled and no order by or primary key +${CLICKHOUSE_CLIENT} -n --query=" + SET create_table_empty_primary_key_by_default = true; + DROP TABLE IF EXISTS test_empty_order_by; + CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; + SHOW CREATE TABLE test_empty_order_by; +" 2>&1 \ | grep -F -q "ORDER BY tuple()" && echo 'OK' || echo 'FAIL' + +# setting enabled and per-column primary key +${CLICKHOUSE_CLIENT} -n --query=" + SET create_table_empty_primary_key_by_default = true; + DROP TABLE IF EXISTS test_empty_order_by; + CREATE TABLE test_empty_order_by(a UInt8 PRIMARY KEY, b String PRIMARY KEY) ENGINE = MergeTree() SETTINGS index_granularity = 8192; + SHOW CREATE TABLE test_empty_order_by; +" 2>&1 \ | grep -F -q "ORDER BY (a, b)" && echo 'OK' || echo 'FAIL' + +# setting enabled and primary key in table definition (not per-column or order by) +${CLICKHOUSE_CLIENT} -n --query=" + SET create_table_empty_primary_key_by_default = true; + DROP TABLE IF EXISTS test_empty_order_by; + CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() PRIMARY KEY (a) SETTINGS index_granularity = 8192; + SHOW CREATE TABLE test_empty_order_by; +" 2>&1 \ | grep -F -q "ORDER BY a" && echo 'OK' || echo 'FAIL' + +# setting enabled and order by in table definition (no primary key) +${CLICKHOUSE_CLIENT} -n --query=" + SET create_table_empty_primary_key_by_default = true; + DROP TABLE IF EXISTS test_empty_order_by; + CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() ORDER BY (a, b) SETTINGS index_granularity = 8192; + SHOW CREATE TABLE test_empty_order_by; +" 2>&1 \ | grep -F -q "ORDER BY (a, b)" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02905_show_setting_query.reference b/tests/queries/0_stateless/02905_show_setting_query.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/02905_show_setting_query.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/02905_show_setting_query.sql b/tests/queries/0_stateless/02905_show_setting_query.sql new file mode 100644 index 00000000000..bbbb1a7e237 --- /dev/null +++ b/tests/queries/0_stateless/02905_show_setting_query.sql @@ -0,0 +1,7 @@ +SET max_threads = 1; +SHOW SETTING max_threads; + +SET max_threads = 2; +SHOW SETTING max_threads; + +SHOW SETTING `max_threads' OR name = 'max_memory_usage`; diff --git a/tests/queries/0_stateless/helpers/tcp_client.py b/tests/queries/0_stateless/helpers/tcp_client.py deleted file mode 100644 index fdc4ab28e04..00000000000 --- a/tests/queries/0_stateless/helpers/tcp_client.py +++ /dev/null @@ -1,313 +0,0 @@ -import socket -import os -import uuid -import struct - -CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") -CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) -CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") -CLIENT_NAME = "simple native protocol" - - -def writeVarUInt(x, ba): - for _ in range(0, 9): - byte = x & 0x7F - if x > 0x7F: - byte |= 0x80 - - ba.append(byte) - - x >>= 7 - if x == 0: - return - - -def writeStringBinary(s, ba): - b = bytes(s, "utf-8") - writeVarUInt(len(s), ba) - ba.extend(b) - - -def serializeClientInfo(ba, query_id): - writeStringBinary("default", ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary("127.0.0.1:9000", ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary("os_user", ba) # os_user - writeStringBinary("client_hostname", ba) # client_hostname - writeStringBinary(CLIENT_NAME, ba) # client_name - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary("", ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry - - -def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num - - -def assertPacket(packet, expected): - assert packet == expected, "Got: {}, expected: {}".format(packet, expected) - - -class Data(object): - def __init__(self, key, value): - self.key = key - self.value = value - - -class TCPClient(object): - def __init__(self, timeout=30): - self.timeout = timeout - self.socket = None - - def __enter__(self): - self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.socket.settimeout(self.timeout) - self.socket.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) - - self.sendHello() - self.receiveHello() - - return self - - def __exit__(self, exc_type, exc_value, traceback): - if self.socket: - self.socket.close() - - def readStrict(self, size=1): - res = bytearray() - while size: - cur = self.socket.recv(size) - # if not res: - # raise "Socket is closed" - size -= len(cur) - res.extend(cur) - - return res - - def readUInt(self, size=1): - res = self.readStrict(size) - val = 0 - for i in range(len(res)): - val += res[i] << (i * 8) - return val - - def readUInt8(self): - return self.readUInt() - - def readUInt16(self): - return self.readUInt(2) - - def readUInt32(self): - return self.readUInt(4) - - def readUInt64(self): - return self.readUInt(8) - - def readFloat16(self): - return struct.unpack("e", self.readStrict(2)) - - def readFloat32(self): - return struct.unpack("f", self.readStrict(4)) - - def readFloat64(self): - return struct.unpack("d", self.readStrict(8)) - - def readVarUInt(self): - x = 0 - for i in range(9): - byte = self.readStrict()[0] - x |= (byte & 0x7F) << (7 * i) - - if not byte & 0x80: - return x - - return x - - def readStringBinary(self): - size = self.readVarUInt() - s = self.readStrict(size) - return s.decode("utf-8") - - def send(self, byte_array): - self.socket.sendall(byte_array) - - def sendHello(self): - ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary(CLIENT_NAME, ba) - writeVarUInt(21, ba) - writeVarUInt(9, ba) - writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary("default", ba) # user - writeStringBinary("", ba) # pwd - self.send(ba) - - def receiveHello(self): - p_type = self.readVarUInt() - assert p_type == 0 # Hello - _server_name = self.readStringBinary() - _server_version_major = self.readVarUInt() - _server_version_minor = self.readVarUInt() - _server_revision = self.readVarUInt() - _server_timezone = self.readStringBinary() - _server_display_name = self.readStringBinary() - _server_version_patch = self.readVarUInt() - - def sendQuery(self, query, settings=None): - if settings == None: - settings = {} # No settings - - ba = bytearray() - query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query - writeStringBinary(query_id, ba) - - ba.append(1) # INITIAL_QUERY - - # client info - serializeClientInfo(ba, query_id) - - # Settings - for key, value in settings.items(): - writeStringBinary(key, ba) - writeVarUInt(1, ba) # is_important - writeStringBinary(str(value), ba) - writeStringBinary("", ba) # End of settings - - writeStringBinary("", ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally - self.send(ba) - - def sendEmptyBlock(self): - ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary("", ba) - serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns - self.send(ba) - - def readException(self): - code = self.readUInt32() - _name = self.readStringBinary() - text = self.readStringBinary() - self.readStringBinary() # trace - assertPacket(self.readUInt8(), 0) # has_nested - return "code {}: {}".format(code, text.replace("DB::Exception:", "")) - - def readPacketType(self): - packet_type = self.readVarUInt() - if packet_type == 2: # Exception - raise RuntimeError(self.readException()) - - return packet_type - - def readResponse(self): - packet_type = self.readPacketType() - if packet_type == 1: # Data - return None - if packet_type == 3: # Progress - return None - if packet_type == 5: # End stream - return None - - raise RuntimeError("Unexpected packet: {}".format(packet_type)) - - def readProgressData(self): - read_rows = self.readVarUInt() - read_bytes = self.readVarUInt() - total_rows_to_read = self.readVarUInt() - written_rows = self.readVarUInt() - written_bytes = self.readVarUInt() - - return read_rows, read_bytes, total_rows_to_read, written_rows, written_bytes - - def readProgress(self): - packet_type = self.readPacketType() - if packet_type == 5: # End stream - return None - assertPacket(packet_type, 3) # Progress - return self.readProgressData() - - def readHeaderInfo(self): - self.readStringBinary() # external table name - # BlockInfo - assertPacket(self.readVarUInt(), 1) # field number 1 - assertPacket(self.readUInt8(), 0) # is_overflows - assertPacket(self.readVarUInt(), 2) # field number 2 - assertPacket(self.readUInt32(), 4294967295) # bucket_num - assertPacket(self.readVarUInt(), 0) # 0 - columns = self.readVarUInt() # rows - rows = self.readVarUInt() # columns - - return columns, rows - - def readHeader(self): - packet_type = self.readPacketType() - assertPacket(packet_type, 1) # Data - - columns, rows = self.readHeaderInfo() - print("Rows {} Columns {}".format(rows, columns)) - for _ in range(columns): - col_name = self.readStringBinary() - type_name = self.readStringBinary() - print("Column {} type {}".format(col_name, type_name)) - - def readRow(self, row_type, rows): - supported_row_types = { - "UInt8": self.readUInt8, - "UInt16": self.readUInt16, - "UInt32": self.readUInt32, - "UInt64": self.readUInt64, - "Float16": self.readFloat16, - "Float32": self.readFloat32, - "Float64": self.readFloat64, - } - if row_type in supported_row_types: - read_type = supported_row_types[row_type] - row = [read_type() for _ in range(rows)] - return row - else: - raise RuntimeError( - "Current python version of tcp client doesn't support the following type of row: {}".format( - row_type - ) - ) - - def readDataWithoutProgress(self, need_print_info=True): - packet_type = self.readPacketType() - while packet_type == 3: # Progress - self.readProgressData() - packet_type = self.readPacketType() - - if packet_type == 5: # End stream - return None - assertPacket(packet_type, 1) # Data - - columns, rows = self.readHeaderInfo() - data = [] - if need_print_info: - print("Rows {} Columns {}".format(rows, columns)) - - for _ in range(columns): - col_name = self.readStringBinary() - type_name = self.readStringBinary() - if need_print_info: - print("Column {} type {}".format(col_name, type_name)) - - data.append(Data(col_name, self.readRow(type_name, rows))) - - return data diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f104c13eab7..4cc81ff460c 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -993,8 +993,11 @@ activerecord addDate addDays addHours +addMicroseconds +addMilliseconds addMinutes addMonths +addNanoseconds addQuarters addSeconds addWeeks @@ -1456,6 +1459,7 @@ farmFingerprint farmHash fastops fcoverage +fibonacci fifo filesystem filesystemAvailable @@ -1682,6 +1686,7 @@ jsoncompactstringseachrowwithnames jsoncompactstringseachrowwithnamesandtypes jsoneachrow jsoneachrowwithprogress +jsonMergePatch jsonobjecteachrow jsonstrings jsonstringseachrow @@ -2315,8 +2320,11 @@ substrings subtitiles subtractDays subtractHours +subtractMicroseconds +subtractMilliseconds subtractMinutes subtractMonths +subtractNanoseconds subtractQuarters subtractSeconds subtractWeeks