From f37f228af9d70a04166178bac24127693d8b8c35 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 24 Jul 2024 13:06:29 +0000 Subject: [PATCH 001/132] Improve columns squashing for String/Array/Map/Variant/Dynamic types --- src/Columns/ColumnArray.cpp | 16 +++ src/Columns/ColumnArray.h | 1 + src/Columns/ColumnDynamic.cpp | 110 ++++++++++++++++++ src/Columns/ColumnDynamic.h | 2 + src/Columns/ColumnMap.cpp | 9 ++ src/Columns/ColumnMap.h | 1 + src/Columns/ColumnNullable.cpp | 16 +++ src/Columns/ColumnNullable.h | 1 + src/Columns/ColumnString.cpp | 15 +++ src/Columns/ColumnString.h | 1 + src/Columns/ColumnTuple.cpp | 13 +++ src/Columns/ColumnTuple.h | 1 + src/Columns/ColumnVariant.cpp | 21 +++- src/Columns/ColumnVariant.h | 1 + src/Columns/IColumn.h | 9 ++ src/Interpreters/Squashing.cpp | 27 +++-- tests/performance/insert_select_squashing.xml | 23 ++++ .../03210_dynamic_squashing.reference | 8 ++ .../0_stateless/03210_dynamic_squashing.sql | 20 ++++ 19 files changed, 285 insertions(+), 10 deletions(-) create mode 100644 tests/performance/insert_select_squashing.xml create mode 100644 tests/queries/0_stateless/03210_dynamic_squashing.reference create mode 100644 tests/queries/0_stateless/03210_dynamic_squashing.sql diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 19cce678cc7..9244d75a04d 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -452,6 +452,22 @@ void ColumnArray::reserve(size_t n) getData().reserve(n); /// The average size of arrays is not taken into account here. Or it is considered to be no more than 1. } +void ColumnArray::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + Columns source_data_columns; + source_data_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_array_column = assert_cast(*source_column); + new_size += source_array_column.size(); + source_data_columns.push_back(source_array_column.getDataPtr()); + } + + getOffsets().reserve_exact(new_size); + data->prepareForSquashing(source_data_columns); +} + void ColumnArray::shrinkToFit() { getOffsets().shrink_to_fit(); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 63affb86d9d..d6f71b72940 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -118,6 +118,7 @@ public: void updatePermutationWithCollation(const Collator & collator, PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index a92d54dd675..74b7ef69d8d 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -643,6 +643,116 @@ ColumnPtr ColumnDynamic::compress() const }); } +void ColumnDynamic::prepareForSquashing(const Columns & source_columns) +{ + if (source_columns.empty()) + return; + + /// Internal variants of source dynamic columns may differ. + /// We want to preallocate memory for all variants we will have after squashing. + /// It may happen that the total number of variants in source columns will + /// exceed the limit, in this case we will choose the most frequent variants. + + /// First, preallocate memory for variant discriminators and offsets. + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + auto & variant_col = getVariantColumn(); + variant_col.getLocalDiscriminators().reserve_exact(new_size); + variant_col.getOffsets().reserve_exact(new_size); + + /// Second, collect all variants and their total sizes. + std::unordered_map total_variant_sizes; + DataTypes all_variants; + + auto add_variants = [&](const ColumnDynamic & source_dynamic) + { + const auto & source_variant_column = source_dynamic.getVariantColumn(); + const auto & source_variant_info = source_dynamic.getVariantInfo(); + const auto & source_variants = assert_cast(*source_variant_info.variant_type).getVariants(); + + for (size_t i = 0; i != source_variants.size(); ++i) + { + const auto & variant_name = source_variant_info.variant_names[i]; + auto it = total_variant_sizes.find(variant_name); + /// Add this variant to the list of all variants if we didn't see it yet. + if (it == total_variant_sizes.end()) + { + all_variants.push_back(source_variants[i]); + it = total_variant_sizes.emplace(variant_name, 0).first; + } + + it->second += source_variant_column.getVariantByGlobalDiscriminator(i).size(); + } + }; + + for (const auto & source_column : source_columns) + add_variants(assert_cast(*source_column)); + + /// Add variants from this dynamic column. + add_variants(*this); + + DataTypePtr result_variant_type; + /// Check if the number of all variants exceeds the limit. + if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_variant_sizes.contains("String"))) + { + /// We want to keep the most frequent variants in the resulting dynamic column. + DataTypes result_variants; + result_variants.reserve(max_dynamic_types); + /// Add variants from current variant column as we will not rewrite it. + for (const auto & variant : assert_cast(*variant_info.variant_type).getVariants()) + result_variants.push_back(variant); + /// Add String variant in advance (if we didn't add it yet) as we must have it across variants when we reach the limit. + if (!variant_info.variant_name_to_discriminator.contains("String")) + result_variants.push_back(std::make_shared()); + + /// Create list of remaining variants with their sizes and sort it. + std::vector> variants_with_sizes; + variants_with_sizes.reserve(all_variants.size() - variant_info.variant_names.size()); + for (const auto & variant : all_variants) + { + /// Add variant to the list only of we didn't add it yet. + auto variant_name = variant->getName(); + if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name)) + variants_with_sizes.emplace_back(total_variant_sizes[variant->getName()], variant); + } + + std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); + /// Add the most frequent variants until we reach max_dynamic_types. + size_t num_new_variants = max_dynamic_types - result_variants.size(); + for (size_t i = 0; i != num_new_variants; ++i) + result_variants.push_back(variants_with_sizes[i].second); + + result_variant_type = std::make_shared(result_variants); + } + else + { + result_variant_type = std::make_shared(all_variants); + } + + if (!result_variant_type->equals(*variant_info.variant_type)) + updateVariantInfoAndExpandVariantColumn(result_variant_type); + + /// Now current dynamic column has all resulting variants and we can call + /// prepareForSquashing on them to preallocate the memory. + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + Columns source_variant_columns; + source_variant_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_dynamic_column = assert_cast(*source_column); + const auto & source_variant_info = source_dynamic_column.getVariantInfo(); + /// Try to find this variant in the current source column. + auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]); + if (it != source_variant_info.variant_name_to_discriminator.end()) + source_variant_columns.push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second)); + } + + variant_col.getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns); + } +} + void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns) { if (!empty()) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index e92cabd3db9..cb3a896d2cb 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -254,6 +254,8 @@ public: variant_column->reserve(n); } + void prepareForSquashing(const Columns & source_columns) override; + void ensureOwnership() override { variant_column->ensureOwnership(); diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 1025b4e77b9..3bab20dfbf2 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -249,6 +249,15 @@ void ColumnMap::reserve(size_t n) nested->reserve(n); } +void ColumnMap::prepareForSquashing(const Columns & source_columns) +{ + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_source_columns.push_back(assert_cast(*source_column).getNestedColumnPtr()); + nested->prepareForSquashing(nested_source_columns); +} + void ColumnMap::shrinkToFit() { nested->shrinkToFit(); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 3eaaa0ad562..191476839f1 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -94,6 +94,7 @@ public: void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 6529f0b78db..2a25cac6461 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -706,6 +706,22 @@ void ColumnNullable::reserve(size_t n) getNullMapData().reserve(n); } +void ColumnNullable::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + Columns nested_source_columns; + nested_source_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + const auto & source_nullable_column = assert_cast(*source_column); + new_size += source_nullable_column.size(); + nested_source_columns.push_back(source_nullable_column.getNestedColumnPtr()); + } + + nested_column->prepareForSquashing(nested_source_columns); + getNullMapData().reserve(new_size); +} + void ColumnNullable::shrinkToFit() { getNestedColumn().shrinkToFit(); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index fe9f5b6dcc2..2c32e0fe5a0 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -125,6 +125,7 @@ public: size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 7cfa2571f5a..9ed2c7e3d4d 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -557,6 +557,21 @@ void ColumnString::reserve(size_t n) offsets.reserve_exact(n); } +void ColumnString::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + size_t new_chars_size = chars.size(); + for (const auto & source_column : source_columns) + { + const auto & source_string_column = assert_cast(*source_column); + new_size += source_string_column.size(); + new_chars_size += source_string_column.chars.size(); + } + + offsets.reserve_exact(new_size); + chars.reserve_exact(new_chars_size); +} + void ColumnString::shrinkToFit() { chars.shrink_to_fit(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c1012e1e55e..20cd950fe9b 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -283,6 +283,7 @@ public: ColumnPtr compress() const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void getExtremes(Field & min, Field & max) const override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 4fc3f88a87c..c6ee7d775ae 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -595,6 +595,19 @@ void ColumnTuple::reserve(size_t n) getColumn(i).reserve(n); } +void ColumnTuple::prepareForSquashing(const Columns & source_columns) +{ + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) + { + Columns nested_columns; + nested_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + nested_columns.push_back(assert_cast(*source_column).getColumnPtr(i)); + getColumn(i).prepareForSquashing(nested_columns); + } +} + void ColumnTuple::shrinkToFit() { const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 16b47a993f6..ef396d6a130 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -110,6 +110,7 @@ public: void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void shrinkToFit() override; void ensureOwnership() override; size_t byteSize() const override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index de7efb41d19..68e19861c38 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1247,8 +1247,25 @@ void ColumnVariant::updatePermutation(IColumn::PermutationSortDirection directio void ColumnVariant::reserve(size_t n) { - local_discriminators->reserve(n); - offsets->reserve(n); + getLocalDiscriminators().reserve_exact(n); + getOffsets().reserve_exact(n); +} + +void ColumnVariant::prepareForSquashing(const Columns & source_columns) +{ + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + reserve(new_size); + + for (size_t i = 0; i != variants.size(); ++i) + { + Columns source_variant_columns; + source_variant_columns.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + source_variant_columns.push_back(assert_cast(*source_column).getVariantPtrByGlobalDiscriminator(i)); + getVariantByGlobalDiscriminator(i).prepareForSquashing(source_variant_columns); + } } void ColumnVariant::ensureOwnership() diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 34c24b5428d..737eb27abfe 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -237,6 +237,7 @@ public: size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; void reserve(size_t n) override; + void prepareForSquashing(const Columns & source_columns) override; void ensureOwnership() override; size_t byteSize() const override; size_t byteSizeAt(size_t n) const override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index f9c1a3e7034..edcb9f0bc30 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -475,6 +475,15 @@ public: /// It affects performance only (not correctness). virtual void reserve(size_t /*n*/) {} + /// Reserve memory before squashing all specified source columns into this column. + virtual void prepareForSquashing(const std::vector & source_columns) + { + size_t new_size = size(); + for (const auto & source_column : source_columns) + new_size += source_column->size(); + reserve(new_size); + } + /// Requests the removal of unused capacity. /// It is a non-binding request to reduce the capacity of the underlying container to its size. virtual void shrinkToFit() {} diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 488177c3b4f..5cd40974c45 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -114,20 +113,32 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl { auto & first_chunk = input_chunks[0]; Columns columns = first_chunk.detachColumns(); + mutable_columns.reserve(columns.size()); for (auto & column : columns) - { mutable_columns.push_back(IColumn::mutate(std::move(column))); - mutable_columns.back()->reserve(rows); - } } + size_t num_columns = mutable_columns.size(); + /// Collect the list of source columns for each column. + std::vector source_columns_list(num_columns, Columns{}); + for (size_t i = 0; i != num_columns; ++i) + source_columns_list[i].reserve(input_chunks.size() - 1); + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above { - Columns columns = input_chunks[i].detachColumns(); - for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + auto columns = input_chunks[i].detachColumns(); + for (size_t j = 0; j != num_columns; ++j) + source_columns_list[j].emplace_back(std::move(columns[j])); + } + + for (size_t i = 0; i != num_columns; ++i) + { + /// We know all the data we will insert in advance and can make all necessary pre-allocations. + mutable_columns[i]->prepareForSquashing(source_columns_list[i]); + for (auto & source_column : source_columns_list[i]) { - const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + auto column = std::move(source_column); + mutable_columns[i]->insertRangeFrom(*column, 0, column->size()); } } diff --git a/tests/performance/insert_select_squashing.xml b/tests/performance/insert_select_squashing.xml new file mode 100644 index 00000000000..4c2c88f3d22 --- /dev/null +++ b/tests/performance/insert_select_squashing.xml @@ -0,0 +1,23 @@ + + + 1000 + + + +CREATE TABLE squash_performance +( + s1 String, + s2 Nullable(String), + a1 Array(Array(String)), + a2 Array(Array(UInt32)), + m1 Map(String, Array(String)), + m2 Map(String, Array(UInt64)), + t Tuple(String, Array(String), Map(String, String)) +) +ENGINE = Null; + + + INSERT INTO squash_performance SELECT * FROM generateRandom(42) LIMIT 500000 + + DROP TABLE IF EXISTS squash_performance + diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.reference b/tests/queries/0_stateless/03210_dynamic_squashing.reference new file mode 100644 index 00000000000..4f5b5ba098c --- /dev/null +++ b/tests/queries/0_stateless/03210_dynamic_squashing.reference @@ -0,0 +1,8 @@ +Array(UInt8) +None +UInt64 +None +String +UInt64 +String +UInt64 diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.sql b/tests/queries/0_stateless/03210_dynamic_squashing.sql new file mode 100644 index 00000000000..23b47184e33 --- /dev/null +++ b/tests/queries/0_stateless/03210_dynamic_squashing.sql @@ -0,0 +1,20 @@ +set allow_experimental_dynamic_type = 1; +set max_block_size = 1000; + +drop table if exists test; + +create table test (d Dynamic) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000); +select distinct dynamicType(d) as type from test order by type; + +drop table test; +create table test (d Dynamic(max_types=2)) engine=MergeTree order by tuple(); +insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000); +select distinct dynamicType(d) as type from test order by type; + +truncate table test; +insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000); +select distinct dynamicType(d) as type from test order by type; + +drop table test; + From 0d8a3f13e39c9e0be87c8459db9582c7c336617f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 26 Jul 2024 21:49:41 +0100 Subject: [PATCH 002/132] impl --- src/Processors/Sources/ShellCommandSource.cpp | 16 ++++++++++++---- .../test_executable_dictionary/test.py | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 55eaf67eb3b..1659287c227 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -8,13 +8,15 @@ #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include + #include +#include namespace DB { @@ -137,9 +139,15 @@ public: while (!bytes_read) { + LOG_TRACE( + getLogger("TimeoutReadBufferFromFileDescriptor"), + "Starting polling on descriptors ({}) with timeout {} ms", + fmt::join(std::span(pfds, pfds + num_pfds) | std::views::transform([](const auto & pollfd) { return pollfd.fd; }), ", "), + timeout_milliseconds); pfds[0].revents = 0; pfds[1].revents = 0; size_t num_events = pollWithTimeout(pfds, num_pfds, timeout_milliseconds); + LOG_TRACE(getLogger("TimeoutReadBufferFromFileDescriptor"), "Poll returned with num_events={}", num_events); if (0 == num_events) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Pipe read timeout exceeded {} milliseconds", timeout_milliseconds); diff --git a/tests/integration/test_executable_dictionary/test.py b/tests/integration/test_executable_dictionary/test.py index 22f3442bb95..a1de429a235 100644 --- a/tests/integration/test_executable_dictionary/test.py +++ b/tests/integration/test_executable_dictionary/test.py @@ -245,6 +245,7 @@ def test_executable_input_slow_python(started_cluster): ) +@pytest.mark.repeat(50) def test_executable_implicit_input_slow_python(started_cluster): skip_test_msan(node) assert node.query_and_get_error( From 6317979825794882905bc02b3a18dd82cfd8ec1c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 29 Jul 2024 20:53:11 +0100 Subject: [PATCH 003/132] add one more --- tests/integration/test_executable_table_function/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_executable_table_function/test.py b/tests/integration/test_executable_table_function/test.py index 801a3c7c14a..a79616fc008 100644 --- a/tests/integration/test_executable_table_function/test.py +++ b/tests/integration/test_executable_table_function/test.py @@ -139,6 +139,7 @@ def test_executable_function_input_signalled_python(started_cluster): assert node.query(query.format(source="(SELECT id FROM test_data_table)")) == "" +@pytest.mark.repeat(50) def test_executable_function_input_slow_python(started_cluster): skip_test_msan(node) From d3830e0a4fd96c286e5b91a78b1a9583f5eb6291 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Jul 2024 14:54:42 +0000 Subject: [PATCH 004/132] Add perf tests for Dynamic and Variant --- .../insert_select_squashing_dynamic.xml | 59 +++++++++++++++++++ .../insert_select_squashing_variant.xml | 34 +++++++++++ 2 files changed, 93 insertions(+) create mode 100644 tests/performance/insert_select_squashing_dynamic.xml create mode 100644 tests/performance/insert_select_squashing_variant.xml diff --git a/tests/performance/insert_select_squashing_dynamic.xml b/tests/performance/insert_select_squashing_dynamic.xml new file mode 100644 index 00000000000..f7f600fd8bd --- /dev/null +++ b/tests/performance/insert_select_squashing_dynamic.xml @@ -0,0 +1,59 @@ + + + 1000 + 0 + 1 + + + +CREATE TABLE dynamic_squash_performance_1 +( + d Dynamic +) +ENGINE = Null; + + + +CREATE TABLE dynamic_squash_performance_2 +( + d Dynamic(max_types=6) +) +ENGINE = Null; + + + +CREATE TABLE src_dynamic_squash_performance_1 +( + d Dynamic +) +ENGINE = Memory; + + + +CREATE TABLE src_dynamic_squash_performance_2 +( + d Dynamic(max_types=6) +) +ENGINE = Memory; + + + + + + + + + + + + INSERT INTO dynamic_squash_performance_1 SELECT number::Dynamic FROM numbers(10000000) + INSERT INTO dynamic_squash_performance_1 SELECT range(number % 100)::Dynamic FROM numbers(2000000) + INSERT INTO dynamic_squash_performance_1 SELECT * FROM src_dynamic_squash_performance_1 + INSERT INTO dynamic_squash_performance_2 SELECT * FROM src_dynamic_squash_performance_2 + + DROP TABLE IF EXISTS dynamic_squash_performance_1 + DROP TABLE IF EXISTS dynamic_squash_performance_2 + DROP TABLE IF EXISTS src_dynamic_squash_performance_1 + DROP TABLE IF EXISTS src_dynamic_squash_performance_2 + + diff --git a/tests/performance/insert_select_squashing_variant.xml b/tests/performance/insert_select_squashing_variant.xml new file mode 100644 index 00000000000..5c59fc7b50f --- /dev/null +++ b/tests/performance/insert_select_squashing_variant.xml @@ -0,0 +1,34 @@ + + + 1000 + 0 + 1 + 1 + + + +CREATE TABLE variant_squash_performance +( + v Variant(Tuple(v1 Array(UInt64)), Tuple(v2 Array(UInt64)), Tuple(v3 Array(UInt64)), Tuple(v4 Array(UInt64)), Tuple(v5 Array(UInt64))) +) +ENGINE = Null; + + + +CREATE TABLE src_variant_squash_performance +( + v Variant(Tuple(v1 Array(UInt64)), Tuple(v2 Array(UInt64)), Tuple(v3 Array(UInt64)), Tuple(v4 Array(UInt64)), Tuple(v5 Array(UInt64))) +) +ENGINE = Memory; + + + + + + + INSERT INTO variant_squash_performance SELECT * FROM src_variant_squash_performance + + DROP TABLE IF EXISTS variant_squash_performance + DROP TABLE IF EXISTS src_variant_squash_performance + + From 08ecf6c6642f7fba6f5503f5121c832b982de945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 30 Jul 2024 20:07:37 +0000 Subject: [PATCH 005/132] Reset formatter after buffer is restarted Formatter can use buffer to initialize some of its members. If buffer is restarted after those members are initialized, then the buffer restart might invalidate pointers/references/iterators hold into buffer. --- src/Storages/MessageQueueSink.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..d4dabd60ef8 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -60,12 +60,12 @@ void MessageQueueSink::consume(Chunk & chunk) row_format->writeRow(columns, row); } row_format->finalize(); - row_format->resetFormatter(); producer->produce(buffer->str(), i, columns, row - 1); /// Reallocate buffer if it's capacity is large then DBMS_DEFAULT_BUFFER_SIZE, /// because most likely in this case we serialized abnormally large row /// and won't need this large allocated buffer anymore. buffer->restart(DBMS_DEFAULT_BUFFER_SIZE); + row_format->resetFormatter(); } } else @@ -73,8 +73,8 @@ void MessageQueueSink::consume(Chunk & chunk) format->write(getHeader().cloneWithColumns(chunk.detachColumns())); format->finalize(); producer->produce(buffer->str(), chunk.getNumRows(), columns, chunk.getNumRows() - 1); - format->resetFormatter(); buffer->restart(); + format->resetFormatter(); } } From e7fc206069796f662bb31aab671cbf75b95984ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 31 Jul 2024 17:20:01 +0000 Subject: [PATCH 006/132] Add test --- .../format_schemas/string_key_value.capnp | 6 + .../format_schemas/string_key_value.format | 1 + .../format_schemas/string_key_value.proto | 6 + .../clickhouse_path/format_schemas/test.capnp | 2 +- tests/integration/test_storage_kafka/test.py | 129 ++++++++++++++++++ 5 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp new file mode 100644 index 00000000000..4f3eabe22f0 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp @@ -0,0 +1,6 @@ +@0x99f75f775fe63dae; + +struct StringKeyValuePair { + key@0 : Text; + value@1 : Text; +} diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format new file mode 100644 index 00000000000..83dff6ce401 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format @@ -0,0 +1 @@ +(key = ${key:CSV}, value = ${value:CSV}) diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto new file mode 100644 index 00000000000..71905c63bdf --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message StringKeyValuePair { + string key = 1; + string value = 2; +} diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp index 44f1961205b..247e7b9ceca 100644 --- a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp @@ -7,4 +7,4 @@ struct TestRecordStruct val1 @2 : Text; val2 @3 : Float32; val3 @4 : UInt8; -} \ No newline at end of file +} diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 8393e88db88..596933c1566 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -5097,6 +5097,135 @@ def test_multiple_read_in_materialized_views(kafka_cluster, max_retries=15): ) +def test_kafka_produce_http_interface_row_based_format(kafka_cluster): + # reproduction of #https://github.com/ClickHouse/ClickHouse/issues/61060 with validating the written messages + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + + topic_prefix = "http_row_" + + # It is important to have: + # - long enough messages + # - enough messages + # I don't know the exact requirement for message sizes, but it doesn't reproduce with short messages + # For the number of messages it seems like at least 3 messages is necessary + expected_key = "01234567890123456789" + expected_value = "aaaaabbbbbccccc" + + insert_query_end = f"(key, value) VALUES ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}')" + insert_query_template = "INSERT INTO {table_name} " + insert_query_end + + extra_settings = { + "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", + "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", + "Template": ", format_template_row='string_key_value.format'" + } + + # Only the formats that can be used both and input and output format are tested + # Reasons to exclude following formats: + # - JSONStrings: not actually an input format + # - ProtobufSingle: I cannot make it work to parse the messages. Probably something is broken, + # because the producer can write multiple rows into a same message, which makes them impossible to parse properly. Should added after #67549 is fixed. + # - ProtobufList: I didn't want to deal with the envelope and stuff + # - Npy: supports only single column + # - LineAsString: supports only single column + # - RawBLOB: supports only single column + formats_to_test = [ + "TabSeparated", + "TabSeparatedRaw", + "TabSeparatedWithNames", + "TabSeparatedWithNamesAndTypes", + "TabSeparatedRawWithNames", + "TabSeparatedRawWithNamesAndTypes", + "Template", + "CSV", + "CSVWithNames", + "CSVWithNamesAndTypes", + "CustomSeparated", + "CustomSeparatedWithNames", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONColumns", + "JSONColumnsWithMetadata", + "JSONCompact", + "JSONCompactColumns", + "JSONEachRow", + "JSONStringsEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNames", + "JSONCompactEachRowWithNamesAndTypes", + "JSONCompactStringsEachRow", + "JSONCompactStringsEachRowWithNames", + "JSONCompactStringsEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "BSONEachRow", + "TSKV", + "Protobuf", + "Avro", + "Parquet", + "Arrow", + "ArrowStream", + "ORC", + "RowBinary", + "RowBinaryWithNames", + "RowBinaryWithNamesAndTypes", + "Native", + "CapnProto", + "MsgPack", + ] + for format in formats_to_test: + logging.debug(f"Creating tables and writing messages to {format}") + topic = topic_prefix + format + kafka_create_topic(admin_client, topic) + + extra_setting = extra_settings.get(format, "") + + # kafka_max_rows_per_message is set to 2 to make sure every format produces at least 2 messages, thus increasing the chance of catching a bug + instance.query( + f""" + DROP TABLE IF EXISTS test.view_{topic}; + DROP TABLE IF EXISTS test.consumer_{topic}; + CREATE TABLE test.kafka_writer_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}', + kafka_max_rows_per_message = 2 {extra_setting}; + + CREATE TABLE test.kafka_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}' {extra_setting}; + + CREATE MATERIALIZED VIEW test.view_{topic} Engine=Log AS + SELECT key, value FROM test.kafka_{topic}; + """ + ) + + instance.http_query(insert_query_template.format(table_name="test.kafka_writer_"+topic), method="POST") + + expected = f"""\ +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +""" + # give some times for the readers to read the messages + for format in formats_to_test: + logging.debug(f"Checking result for {format}") + topic = topic_prefix + format + + result = instance.query_with_retry(f"SELECT * FROM test.view_{topic}", check_callback=lambda res: res.count("\n") == 3) + + assert TSV(result) == TSV(expected) + + kafka_delete_topic(admin_client, topic) + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From bada9ea9be4888ab4ef1b3f7fcdf015e11d994d1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 17:30:24 +0000 Subject: [PATCH 007/132] Automatic style fix --- tests/integration/test_storage_kafka/test.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 596933c1566..dd0bf1bf28f 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -5103,7 +5103,6 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster): bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) - topic_prefix = "http_row_" # It is important to have: @@ -5120,7 +5119,7 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster): extra_settings = { "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", - "Template": ", format_template_row='string_key_value.format'" + "Template": ", format_template_row='string_key_value.format'", } # Only the formats that can be used both and input and output format are tested @@ -5208,7 +5207,10 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster): """ ) - instance.http_query(insert_query_template.format(table_name="test.kafka_writer_"+topic), method="POST") + instance.http_query( + insert_query_template.format(table_name="test.kafka_writer_" + topic), + method="POST", + ) expected = f"""\ {expected_key}\t{expected_value} @@ -5220,12 +5222,16 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster): logging.debug(f"Checking result for {format}") topic = topic_prefix + format - result = instance.query_with_retry(f"SELECT * FROM test.view_{topic}", check_callback=lambda res: res.count("\n") == 3) + result = instance.query_with_retry( + f"SELECT * FROM test.view_{topic}", + check_callback=lambda res: res.count("\n") == 3, + ) assert TSV(result) == TSV(expected) kafka_delete_topic(admin_client, topic) + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From a80c7c080cdbb3bec4662501686133298b7a4a2d Mon Sep 17 00:00:00 2001 From: Jacob Reckhard Date: Thu, 1 Aug 2024 18:23:11 -0600 Subject: [PATCH 008/132] Added support for reading multilinestring wkts --- docs/en/sql-reference/data-types/geo.md | 42 ++++++++++++ .../en/sql-reference/functions/geo/polygon.md | 42 ++++++++++-- src/DataTypes/DataTypeCustomGeo.cpp | 7 ++ src/DataTypes/DataTypeCustomGeo.h | 6 ++ src/Functions/geometryConverters.h | 64 +++++++++++++++++++ src/Functions/polygonsIntersection.cpp | 2 + src/Functions/polygonsSymDifference.cpp | 2 + src/Functions/polygonsUnion.cpp | 2 + src/Functions/polygonsWithin.cpp | 2 + src/Functions/readWkt.cpp | 30 +++++++++ .../03215_multilinestring_geometry.reference | 17 +++++ .../03215_multilinestring_geometry.sql | 12 ++++ .../aspell-ignore/en/aspell-dict.txt | 4 +- 13 files changed, 227 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03215_multilinestring_geometry.reference create mode 100644 tests/queries/0_stateless/03215_multilinestring_geometry.sql diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 7ffc7447d96..8ce53bb2ef2 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -52,6 +52,48 @@ Result: └───────────────────────────────┴───────────────┘ ``` +## LineString + +`LineString` is a line stored as an array of points: [Array](array.md)([Point](#point)). + +**Example** + +Query: + +```sql +CREATE TABLE geo_linestring (l LineString) ENGINE = Memory(); +INSERT INTO geo_linestring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]); +SELECT l, toTypeName(l) FROM geo_linestring; +``` +Result: + +``` text +┌─r─────────────────────────────┬─toTypeName(r)─┐ +│ [(0,0),(10,0),(10,10),(0,10)] │ LineString │ +└───────────────────────────────┴───────────────┘ +``` + +## MultiLineString + +`MultiLineString` is multiple lines stored as an array of `LineString`: [Array](array.md)([LineString](#linestring)). + +**Example** + +Query: + +```sql +CREATE TABLE geo_multilinestring (l MultiLineString) ENGINE = Memory(); +INSERT INTO geo_multilinestring VALUES([[(0, 0), (10, 0), (10, 10), (0, 10)], [(1, 1), (2, 2), (3, 3)]]); +SELECT l, toTypeName(l) FROM geo_multilinestring; +``` +Result: + +``` text +┌─l───────────────────────────────────────────────────┬─toTypeName(l)───┐ +│ [[(0,0),(10,0),(10,10),(0,10)],[(1,1),(2,2),(3,3)]] │ MultiLineString │ +└─────────────────────────────────────────────────────┴─────────────────┘ +``` + ## Polygon `Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes. diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index 25a7a1fac8e..c054e05d39c 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -6,11 +6,13 @@ title: "Functions for Working with Polygons" ## WKT -Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are: +Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are: - POINT - POLYGON - MULTIPOLYGON +- LINESTRING +- MULTILINESTRING **Syntax** @@ -26,12 +28,16 @@ WKT(geo_data) - [Ring](../../data-types/geo.md#ring) - [Polygon](../../data-types/geo.md#polygon) - [MultiPolygon](../../data-types/geo.md#multipolygon) +- [LineString](../../data-types/geo.md#linestring) +- [MultiLineString](../../data-types/geo.md#multilinestring) **Returned value** - WKT geometric object `POINT` is returned for a Point. - WKT geometric object `POLYGON` is returned for a Polygon -- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon. +- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon. +- WKT geometric object `LINESTRING` is returned for a LineString. +- WKT geometric object `MULTILINESTRING` is returned for a MultiLineString. **Examples** @@ -84,7 +90,7 @@ SELECT ### Input parameters -String starting with `MULTIPOLYGON` +String starting with `MULTIPOLYGON` ### Returned value @@ -170,6 +176,34 @@ SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)'); [(1,1),(2,2),(3,3),(1,1)] ``` +## readWKTMultiLineString + +Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format. + +### Syntax + +```sql +readWKTMultiLineString(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a MultiLineString geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the multilinestring geometry. + +### Example + +```sql +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))')); +``` + +```response +[[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]] +``` + ## readWKTRing Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format. @@ -219,7 +253,7 @@ UInt8, 0 for false, 1 for true ## polygonsDistanceSpherical -Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise. +Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise. ### Example diff --git a/src/DataTypes/DataTypeCustomGeo.cpp b/src/DataTypes/DataTypeCustomGeo.cpp index 0736d837d46..d72787647c3 100644 --- a/src/DataTypes/DataTypeCustomGeo.cpp +++ b/src/DataTypes/DataTypeCustomGeo.cpp @@ -24,6 +24,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory) std::make_unique(std::make_unique())); }); + // Custom type for mulitple lines stored as Array(LineString) + factory.registerSimpleDataTypeCustom("MultiLineString", [] + { + return std::make_pair(DataTypeFactory::instance().get("Array(LineString)"), + std::make_unique(std::make_unique())); + }); + // Custom type for simple polygon without holes stored as Array(Point) factory.registerSimpleDataTypeCustom("Ring", [] { diff --git a/src/DataTypes/DataTypeCustomGeo.h b/src/DataTypes/DataTypeCustomGeo.h index 0a1c83e4638..6a632f0d05c 100644 --- a/src/DataTypes/DataTypeCustomGeo.h +++ b/src/DataTypes/DataTypeCustomGeo.h @@ -17,6 +17,12 @@ public: DataTypeLineStringName() : DataTypeCustomFixedName("LineString") {} }; +class DataTypeMultiLineStringName : public DataTypeCustomFixedName +{ +public: + DataTypeMultiLineStringName() : DataTypeCustomFixedName("MultiLineString") {} +}; + class DataTypeRingName : public DataTypeCustomFixedName { public: diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h index 03831d37e0c..bf975017a6d 100644 --- a/src/Functions/geometryConverters.h +++ b/src/Functions/geometryConverters.h @@ -31,6 +31,9 @@ namespace ErrorCodes template using LineString = boost::geometry::model::linestring; +template +using MultiLineString = boost::geometry::model::multi_linestring>; + template using Ring = boost::geometry::model::ring; @@ -42,12 +45,14 @@ using MultiPolygon = boost::geometry::model::multi_polygon>; using CartesianPoint = boost::geometry::model::d2::point_xy; using CartesianLineString = LineString; +using CartesianMultiLineString = MultiLineString; using CartesianRing = Ring; using CartesianPolygon = Polygon; using CartesianMultiPolygon = MultiPolygon; using SphericalPoint = boost::geometry::model::point>; using SphericalLineString = LineString; +using SphericalMultiLineString = MultiLineString; using SphericalRing = Ring; using SphericalPolygon = Polygon; using SphericalMultiPolygon = MultiPolygon; @@ -113,6 +118,28 @@ struct ColumnToLineStringsConverter } }; +/** + * Class which converts Column with type Array(Array(Tuple(Float64, Float64))) to a vector of boost multi_linestring type. +*/ +template +struct ColumnToMultiLineStringsConverter +{ + static std::vector> convert(ColumnPtr col) + { + const IColumn::Offsets & offsets = typeid_cast(*col).getOffsets(); + size_t prev_offset = 0; + std::vector> answer(offsets.size()); + auto all_linestrings = ColumnToLineStringsConverter::convert(typeid_cast(*col).getDataPtr()); + for (size_t iter = 0; iter < offsets.size() && iter < all_linestrings.size(); ++iter) + { + for (size_t linestring_iter = prev_offset; linestring_iter < offsets[iter]; ++linestring_iter) + answer[iter].emplace_back(std::move(all_linestrings[linestring_iter])); + prev_offset = offsets[iter]; + } + return answer; + } +}; + /** * Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost ring type. */ @@ -268,6 +295,38 @@ private: ColumnUInt64::MutablePtr offsets; }; +/// Serialize Point, MultiLineString as MultiLineString +template +class MultiLineStringSerializer +{ +public: + MultiLineStringSerializer() + : offsets(ColumnUInt64::create()) + {} + + explicit MultiLineStringSerializer(size_t n) + : offsets(ColumnUInt64::create(n)) + {} + + void add(const MultiLineString & multilinestring) + { + size += multilinestring.size(); + offsets->insertValue(size); + for (const auto & linestring : multilinestring) + linestring_serializer.add(linestring); + } + + ColumnPtr finalize() + { + return ColumnArray::create(linestring_serializer.finalize(), std::move(offsets)); + } + +private: + size_t size = 0; + LineStringSerializer linestring_serializer; + ColumnUInt64::MutablePtr offsets; +}; + /// Almost the same as LineStringSerializer /// Serialize Point, Ring as Ring template @@ -411,6 +470,11 @@ static void callOnGeometryDataType(DataTypePtr type, F && f) else if (factory.get("LineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "LineString") return f(ConverterType>()); + /// We should take the name into consideration to avoid ambiguity. + /// Because for example both MultiLineString and Polygon are resolved to Array(Tuple(Point)). + else if (factory.get("MultiLineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "MultiLineString") + return f(ConverterType>()); + /// For backward compatibility if we call this function not on a custom type, we will consider Array(Tuple(Point)) as type Ring. else if (factory.get("Ring")->equals(*type)) return f(ConverterType>()); diff --git a/src/Functions/polygonsIntersection.cpp b/src/Functions/polygonsIntersection.cpp index 329242e762e..43ab03f8c1f 100644 --- a/src/Functions/polygonsIntersection.cpp +++ b/src/Functions/polygonsIntersection.cpp @@ -75,6 +75,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index 3c219d0facb..6faec95bb7b 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -73,6 +73,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index 969eb2f78fb..5378ff636f8 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -73,6 +73,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index c63ad5ef868..dacd1c0e18f 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -77,6 +77,8 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be LineString", getName()); + else if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be MultiLineString", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index eb262777b0d..2010b5167e7 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -87,6 +87,11 @@ struct ReadWKTLineStringNameHolder static constexpr const char * name = "readWKTLineString"; }; +struct ReadWKTMultiLineStringNameHolder +{ + static constexpr const char * name = "readWKTMultiLineString"; +}; + struct ReadWKTRingNameHolder { static constexpr const char * name = "readWKTRing"; @@ -131,6 +136,31 @@ Parses a Well-Known Text (WKT) representation of a LineString geometry and retur }, .categories{"Unique identifiers"} }); + factory.registerFunction, ReadWKTMultiLineStringNameHolder>>(FunctionDocumentation + { + .description=R"( +Parses a Well-Known Text (WKT) representation of a MultiLineString geometry and returns it in the internal ClickHouse format. +)", + .syntax = "readWKTMultiLineString(wkt_string)", + .arguments{ + {"wkt_string", "The input WKT string representing a MultiLineString geometry."} + }, + .returned_value = "The function returns a ClickHouse internal representation of the multilinestring geometry.", + .examples{ + {"first call", "SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))');", R"( +┌─readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))')─┐ +│ [[(1,1),(2,2),(3,3)],[(4,4),(5,5),(6,6)]] │ +└──────────────────────────────────────────────────────────────────────────────┘ + + )"}, + {"second call", "SELECT toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'));", R"( +┌─toTypeName(readWKTLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'))─┐ +│ MultiLineString │ +└─────────────────────────────────────────────────────────────────────────┘ + )"}, + }, + .categories{"Unique identifiers"} + }); factory.registerFunction, ReadWKTRingNameHolder>>(); factory.registerFunction, ReadWKTPolygonNameHolder>>(); factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.reference b/tests/queries/0_stateless/03215_multilinestring_geometry.reference new file mode 100644 index 00000000000..f4c5774018e --- /dev/null +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.reference @@ -0,0 +1,17 @@ +-- { echoOn } +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +[[(1,1),(2,2),(3,3),(1,1)]] +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); +MultiLineString +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); +MULTILINESTRING((1 1,2 2,3 3,1 1)) +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +[[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); +MultiLineString +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); +MULTILINESTRING((1 1,2 2,3 3,1 1),(1 0,2 0,3 0)) +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x +SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); +POLYGON((1 1,2 2,3 3,1 1)) String [[(1,1),(2,2),(3,3),(1,1)]] Polygon diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.sql b/tests/queries/0_stateless/03215_multilinestring_geometry.sql new file mode 100644 index 00000000000..71344920c52 --- /dev/null +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.sql @@ -0,0 +1,12 @@ +-- { echoOn } +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))')); + +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +SELECT toTypeName(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); +SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))')); + +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x +SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index b21ae0764c6..3d7e77f213d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2942 +personal_ws-1.1 en 2942 AArch ACLs ALTERs @@ -561,6 +561,7 @@ MindsDB Mongodb Monotonicity MsgPack +MultiLineString MultiPolygon Multiline Multiqueries @@ -2361,6 +2362,7 @@ rankCorr rapidjson rawblob readWKTLineString +readWKTMultiLineString readWKTMultiPolygon readWKTPoint readWKTPolygon From 51b39a6c745d61cb2e6feb39659ddb3cac57ad03 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 1 Aug 2024 18:24:45 +0100 Subject: [PATCH 009/132] some more --- tests/integration/test_executable_dictionary/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_executable_dictionary/test.py b/tests/integration/test_executable_dictionary/test.py index a1de429a235..2a6af75e751 100644 --- a/tests/integration/test_executable_dictionary/test.py +++ b/tests/integration/test_executable_dictionary/test.py @@ -235,6 +235,7 @@ def test_executable_implicit_input_signalled_python(started_cluster): ) +@pytest.mark.repeat(50) def test_executable_input_slow_python(started_cluster): skip_test_msan(node) assert node.query_and_get_error( From 51918dc080c9fa4b128a151cfbd0d28b294c56d3 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 3 Aug 2024 00:24:00 +0100 Subject: [PATCH 010/132] impl --- src/Common/ShellCommand.cpp | 9 ++++- src/Processors/Sources/ShellCommandSource.cpp | 39 +++++++++++++------ 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 98a21b43d76..79b0d667863 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -237,7 +237,14 @@ std::unique_ptr ShellCommand::executeImpl( res->write_fds.emplace(fd, fds.fds_rw[1]); } - LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid); + LOG_TRACE( + getLogger(), + "Started shell command '{}' with pid {} and file descriptors: read {}, write {}", + filename, + pid, + res->out.getFD(), + res->err.getFD()); + return res; } diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 1659287c227..923bdfad8f8 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -75,6 +75,15 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond while (true) { Stopwatch watch; + +#if defined(DEBUG_OR_SANITIZER_BUILD) + auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; + LOG_TRACE( + getLogger("TimeoutReadBufferFromFileDescriptor"), + "Polling descriptors: {}", + fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", ")); +#endif + res = poll(pfds, static_cast(num), static_cast(timeout_milliseconds)); if (res < 0) @@ -84,7 +93,16 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond const auto elapsed = watch.elapsedMilliseconds(); if (timeout_milliseconds <= elapsed) + { +#if defined(DEBUG_OR_SANITIZER_BUILD) + LOG_TRACE( + getLogger("TimeoutReadBufferFromFileDescriptor"), + "Timeout exceeded: elapsed={}, timeout={}", + elapsed, + timeout_milliseconds); +#endif break; + } timeout_milliseconds -= elapsed; } else @@ -93,6 +111,15 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond } } +#if defined(DEBUG_OR_SANITIZER_BUILD) + auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; + LOG_TRACE( + getLogger("TimeoutReadBufferFromFileDescriptor"), + "Poll for descriptors: {} returned {}", + fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "), + res); +#endif + return res; } @@ -139,15 +166,9 @@ public: while (!bytes_read) { - LOG_TRACE( - getLogger("TimeoutReadBufferFromFileDescriptor"), - "Starting polling on descriptors ({}) with timeout {} ms", - fmt::join(std::span(pfds, pfds + num_pfds) | std::views::transform([](const auto & pollfd) { return pollfd.fd; }), ", "), - timeout_milliseconds); pfds[0].revents = 0; pfds[1].revents = 0; size_t num_events = pollWithTimeout(pfds, num_pfds, timeout_milliseconds); - LOG_TRACE(getLogger("TimeoutReadBufferFromFileDescriptor"), "Poll returned with num_events={}", num_events); if (0 == num_events) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Pipe read timeout exceeded {} milliseconds", timeout_milliseconds); @@ -208,12 +229,6 @@ public: return true; } - void reset() const - { - makeFdBlocking(stdout_fd); - makeFdBlocking(stderr_fd); - } - ~TimeoutReadBufferFromFileDescriptor() override { tryMakeFdBlocking(stdout_fd); From f816158dbc000c4fb02ba413f5b47349795995c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 5 Aug 2024 09:11:55 +0000 Subject: [PATCH 011/132] Address review comments --- src/Storages/MessageQueueSink.cpp | 4 ++-- tests/integration/test_storage_kafka/test.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index d4dabd60ef8..104e3506ade 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -46,6 +46,8 @@ void MessageQueueSink::consume(Chunk & chunk) if (columns.empty()) return; + /// The formatter might hold pointers to buffer (e.g. if PeekableWriteBuffer is used), which means the formatter + /// needs to be reset after buffer might reallocate its memory. In this exact case after restarting the buffer. if (row_format) { size_t row = 0; @@ -77,6 +79,4 @@ void MessageQueueSink::consume(Chunk & chunk) format->resetFormatter(); } } - - } diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index dd0bf1bf28f..8793ab72a16 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -5098,7 +5098,7 @@ def test_multiple_read_in_materialized_views(kafka_cluster, max_retries=15): def test_kafka_produce_http_interface_row_based_format(kafka_cluster): - # reproduction of #https://github.com/ClickHouse/ClickHouse/issues/61060 with validating the written messages + # reproduction of #61060 with validating the written messages admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) From cb6baefa948ef1270ce9454f72075ac10bf6e729 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 5 Aug 2024 21:49:36 +0100 Subject: [PATCH 012/132] better --- src/Common/ShellCommand.cpp | 2 +- src/Processors/Sources/ShellCommandSource.cpp | 12 +++--------- tests/integration/test_executable_dictionary/test.py | 2 -- .../test_executable_table_function/test.py | 1 - 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 79b0d667863..0d41669816c 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -239,7 +239,7 @@ std::unique_ptr ShellCommand::executeImpl( LOG_TRACE( getLogger(), - "Started shell command '{}' with pid {} and file descriptors: read {}, write {}", + "Started shell command '{}' with pid {} and file descriptors: out {}, err {}", filename, pid, res->out.getFD(), diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 923bdfad8f8..23359367a9b 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -76,13 +76,11 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond { Stopwatch watch; -#if defined(DEBUG_OR_SANITIZER_BUILD) auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; - LOG_TRACE( + LOG_TEST( getLogger("TimeoutReadBufferFromFileDescriptor"), "Polling descriptors: {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", ")); -#endif res = poll(pfds, static_cast(num), static_cast(timeout_milliseconds)); @@ -94,13 +92,11 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond const auto elapsed = watch.elapsedMilliseconds(); if (timeout_milliseconds <= elapsed) { -#if defined(DEBUG_OR_SANITIZER_BUILD) - LOG_TRACE( + LOG_TEST( getLogger("TimeoutReadBufferFromFileDescriptor"), "Timeout exceeded: elapsed={}, timeout={}", elapsed, timeout_milliseconds); -#endif break; } timeout_milliseconds -= elapsed; @@ -111,14 +107,12 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond } } -#if defined(DEBUG_OR_SANITIZER_BUILD) auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; - LOG_TRACE( + LOG_TEST( getLogger("TimeoutReadBufferFromFileDescriptor"), "Poll for descriptors: {} returned {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "), res); -#endif return res; } diff --git a/tests/integration/test_executable_dictionary/test.py b/tests/integration/test_executable_dictionary/test.py index 2a6af75e751..22f3442bb95 100644 --- a/tests/integration/test_executable_dictionary/test.py +++ b/tests/integration/test_executable_dictionary/test.py @@ -235,7 +235,6 @@ def test_executable_implicit_input_signalled_python(started_cluster): ) -@pytest.mark.repeat(50) def test_executable_input_slow_python(started_cluster): skip_test_msan(node) assert node.query_and_get_error( @@ -246,7 +245,6 @@ def test_executable_input_slow_python(started_cluster): ) -@pytest.mark.repeat(50) def test_executable_implicit_input_slow_python(started_cluster): skip_test_msan(node) assert node.query_and_get_error( diff --git a/tests/integration/test_executable_table_function/test.py b/tests/integration/test_executable_table_function/test.py index a79616fc008..801a3c7c14a 100644 --- a/tests/integration/test_executable_table_function/test.py +++ b/tests/integration/test_executable_table_function/test.py @@ -139,7 +139,6 @@ def test_executable_function_input_signalled_python(started_cluster): assert node.query(query.format(source="(SELECT id FROM test_data_table)")) == "" -@pytest.mark.repeat(50) def test_executable_function_input_slow_python(started_cluster): skip_test_msan(node) From 99b18d31db32a077678661bd9ba84fb52ff49333 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 14 Jul 2024 15:30:38 +0200 Subject: [PATCH 013/132] Update gdb to 15.1 (by compiling from sources) Right now there are couple of gdb bugs that makes CI unstable: - https://sourceware.org/bugzilla/show_bug.cgi?id=29185 - https://bugzilla.redhat.com/show_bug.cgi?id=1492496 But ubuntu 22.04 does not have 14+ anywhere, the ~ubuntu-toolchain-r/test contains only gdb 13, so there is no other options except for compiling it from sources. But there also other reasons to update it - optimizations, looks like older gdb versions does not use index fully - 5.6sec vs 56sec: # 15.1 $ time command gdb -batch -ex 'disas main' clickhouse ... real 0m5.692s user 0m29.948s sys 0m1.190s # 12.1 (from ubuntu 22.04) real 0m56.709s user 0m59.307s sys 0m0.585s Also note, that we cannot compile gdb in the fasttest (that contains compiler) since some images does not includes full toolchain, for instance gdb is added in the following images: - test-util -> test-base -> lots of other images (no toolchain) - performance-comparison (no toolchain) - integration-test (no toolchain) - integration-tests-runner (no toolchain) Signed-off-by: Azat Khuzhin --- docker/images.json | 4 +++ docker/packager/binary-builder/Dockerfile | 5 ++++ docker/packager/gdb/Dockerfile | 30 +++++++++++++++++++++++ 3 files changed, 39 insertions(+) create mode 100644 docker/packager/gdb/Dockerfile diff --git a/docker/images.json b/docker/images.json index 716b76ee217..eae4ad30a79 100644 --- a/docker/images.json +++ b/docker/images.json @@ -7,6 +7,10 @@ "name": "clickhouse/cctools", "dependent": [] }, + "docker/packager/gdb": { + "name": "clickhouse/gdb", + "dependent": [] + }, "docker/test/compatibility/centos": { "name": "clickhouse/test-old-centos", "dependent": [] diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index 7d6acdcd856..647ab8758a5 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -6,6 +6,11 @@ ENV CXX=clang++-${LLVM_VERSION} # If the cctools is updated, then first build it in the CI, then update here in a different commit COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools +# TODO: same for gdb and in other places as well +# +# NOTE: here it will add circular dependency but it will be fixed after [1] +# +# [1]: https://github.com/ClickHouse/ClickHouse/issues/66493 # Rust toolchain and libraries ENV RUSTUP_HOME=/rust/rustup diff --git a/docker/packager/gdb/Dockerfile b/docker/packager/gdb/Dockerfile new file mode 100644 index 00000000000..1b5c3bbcb2e --- /dev/null +++ b/docker/packager/gdb/Dockerfile @@ -0,0 +1,30 @@ +# docker build -t clickhouse/gdb . + +ARG FROM_TAG=latest +FROM clickhouse/fasttest:$FROM_TAG + +ENV CC=clang-${LLVM_VERSION} +ENV CXX=clang++-${LLVM_VERSION} +# ld from binutils is 2.38, which has the following error: +# +# DWARF error: invalid or unhandled FORM value: 0x23 +# +ENV LD=ld.lld-${LLVM_VERSION} + +ARG GDB_VERSION=15.1 + +# gdb dependencies +RUN apt-get update \ + && apt-get install --yes \ + libgmp-dev \ + libmpfr-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + +RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \ + && tar -xvf gdb-$GDB_VERSION.tar.gz \ + && cd gdb-$GDB_VERSION \ + && ./configure --prefix=/usr \ + && make -j $(nproc) \ + && make install \ + && rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz From 459dd1ff2db90f85d9c1f72329ff2f31117c13a7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 14 Jul 2024 16:35:49 +0200 Subject: [PATCH 014/132] Add a comment of the image name for cctools Signed-off-by: Azat Khuzhin --- docker/packager/cctools/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile index d986c6a3c86..56a4a65bcc9 100644 --- a/docker/packager/cctools/Dockerfile +++ b/docker/packager/cctools/Dockerfile @@ -1,3 +1,5 @@ +# docker build -t clickhouse/cctools . + # This is a hack to significantly reduce the build time of the clickhouse/binary-builder # It's based on the assumption that we don't care of the cctools version so much # It event does not depend on the clickhouse/fasttest in the `docker/images.json` From 16fcd5b825b50ba88de8ef42b37d40eac1af5596 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 14 Jul 2024 15:48:12 +0200 Subject: [PATCH 015/132] Remove .gdb-index (do not give any benefit for gdb 15.1) Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f796e6c4616..f3c6b2abb30 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,14 +187,6 @@ else () set(NO_WHOLE_ARCHIVE --no-whole-archive) endif () -if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") - # Can be lld or ld-lld or lld-13 or /path/to/lld. - if (LINKER_NAME MATCHES "lld") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index") - message (STATUS "Adding .gdb-index via --gdb-index linker option.") - endif () -endif() - if (NOT (SANITIZE_COVERAGE OR WITH_COVERAGE) AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" From 424f19d6c0d003d2a9e2c03017c7fa7dc9dd9233 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 5 Aug 2024 22:49:27 +0200 Subject: [PATCH 016/132] Move gdb into cctools Signed-off-by: Azat Khuzhin --- docker/images.json | 4 ---- docker/packager/cctools/Dockerfile | 24 ++++++++++++++++++++++++ docker/packager/gdb/Dockerfile | 30 ------------------------------ 3 files changed, 24 insertions(+), 34 deletions(-) delete mode 100644 docker/packager/gdb/Dockerfile diff --git a/docker/images.json b/docker/images.json index eae4ad30a79..716b76ee217 100644 --- a/docker/images.json +++ b/docker/images.json @@ -7,10 +7,6 @@ "name": "clickhouse/cctools", "dependent": [] }, - "docker/packager/gdb": { - "name": "clickhouse/gdb", - "dependent": [] - }, "docker/test/compatibility/centos": { "name": "clickhouse/test-old-centos", "dependent": [] diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile index 56a4a65bcc9..570a42d42d5 100644 --- a/docker/packager/cctools/Dockerfile +++ b/docker/packager/cctools/Dockerfile @@ -32,5 +32,29 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && cd ../.. \ && rm -rf cctools-port +# +# GDB +# +# ld from binutils is 2.38, which has the following error: +# +# DWARF error: invalid or unhandled FORM value: 0x23 +# +ENV LD=ld.lld-${LLVM_VERSION} +ARG GDB_VERSION=15.1 +RUN apt-get update \ + && apt-get install --yes \ + libgmp-dev \ + libmpfr-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \ + && tar -xvf gdb-$GDB_VERSION.tar.gz \ + && cd gdb-$GDB_VERSION \ + && ./configure --prefix=/opt/gdb \ + && make -j $(nproc) \ + && make install \ + && rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz + FROM scratch COPY --from=builder /cctools /cctools +COPY --from=builder /opt/gdb /opt/gdb diff --git a/docker/packager/gdb/Dockerfile b/docker/packager/gdb/Dockerfile deleted file mode 100644 index 1b5c3bbcb2e..00000000000 --- a/docker/packager/gdb/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -# docker build -t clickhouse/gdb . - -ARG FROM_TAG=latest -FROM clickhouse/fasttest:$FROM_TAG - -ENV CC=clang-${LLVM_VERSION} -ENV CXX=clang++-${LLVM_VERSION} -# ld from binutils is 2.38, which has the following error: -# -# DWARF error: invalid or unhandled FORM value: 0x23 -# -ENV LD=ld.lld-${LLVM_VERSION} - -ARG GDB_VERSION=15.1 - -# gdb dependencies -RUN apt-get update \ - && apt-get install --yes \ - libgmp-dev \ - libmpfr-dev \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* - -RUN wget https://sourceware.org/pub/gdb/releases/gdb-$GDB_VERSION.tar.gz \ - && tar -xvf gdb-$GDB_VERSION.tar.gz \ - && cd gdb-$GDB_VERSION \ - && ./configure --prefix=/usr \ - && make -j $(nproc) \ - && make install \ - && rm -fr gdb-$GDB_VERSION gdb-$GDB_VERSION.tar.gz From 815fdc43ac333a75adff646fef073ea591494d13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 6 Aug 2024 14:36:02 +0000 Subject: [PATCH 017/132] Revert "Merge pull request #67800 from ClickHouse/revert-66510" This reverts commit 45c4a71ccb62bac6728d0e583fd04c0fc4f45a6f, reversing changes made to bb71c1eea8e6019a5a21b6add08c2244764ddea5. --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 21 ++++++++++++------- src/Storages/VirtualColumnUtils.h | 10 ++++++++- ..._with_non_deterministic_function.reference | 2 ++ ..._count_with_non_deterministic_function.sql | 4 ++++ 5 files changed, 30 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference create mode 100644 tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 49888596fbb..ce27ad24e10 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1146,7 +1146,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); if (!filter_dag) return {}; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ba1f4488005..90c2c7f93c1 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -275,7 +275,8 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes) + ActionsDAG::Nodes & additional_nodes, + bool allow_non_deterministic_functions) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -284,8 +285,14 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) node_copy.children.push_back(child_copy); + /// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for + /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is + /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply + /// trivial count. + else if (!allow_non_deterministic_functions) + return nullptr; if (node_copy.children.empty()) return nullptr; @@ -311,7 +318,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) return nullptr; return &node_copy; @@ -325,7 +332,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions().clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag.getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -359,13 +366,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) { if (!predicate) return {}; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); if (!res) return {}; @@ -374,7 +381,7 @@ std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); if (dag) filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index d75dc70ae44..abf46dc23a4 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -41,7 +41,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic +/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr. +/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in +/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is +/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` +/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is +/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial +/// count optimization will be mistakenly applied to the query. +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true); /// Extract from the input stream a set of `name` column values template diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql new file mode 100644 index 00000000000..bb3269da597 --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql @@ -0,0 +1,4 @@ +CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; +INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; From 2074485083e8860aafc36ac7886a54a75e144468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 6 Aug 2024 14:25:02 +0000 Subject: [PATCH 018/132] Fix partial filtering in `filterBlockWithPredicate` --- src/Storages/MergeTree/MergeTreeData.cpp | 3 +- src/Storages/VirtualColumnUtils.cpp | 86 +++++++++---------- src/Storages/VirtualColumnUtils.h | 16 ++-- ...03217_read_rows_in_system_tables.reference | 10 +++ .../03217_read_rows_in_system_tables.sql | 34 ++++++++ 5 files changed, 97 insertions(+), 52 deletions(-) create mode 100644 tests/queries/0_stateless/03217_read_rows_in_system_tables.reference create mode 100644 tests/queries/0_stateless/03217_read_rows_in_system_tables.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ce27ad24e10..b24d7968b61 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6914,7 +6914,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const auto * predicate = filter_dag->getOutputs().at(0); // Generate valid expressions for filtering - VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context); + VirtualColumnUtils::filterBlockWithPredicate( + predicate, virtual_columns_block, query_context, /*allow_filtering_with_partial_predicate =*/true); rows = virtual_columns_block.rows(); part_name_column = virtual_columns_block.getByName("_part").column; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 90c2c7f93c1..b40378250bb 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,51 +1,46 @@ -#include +#include + #include #include +#include +#include +#include +#include +#include #include #include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include #include +#include #include +#include #include - -#include #include -#include #include +#include +#include #include #include - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include +#include +#include #include #include +#include #include -#include #include - -#include -#include #include -#include "Functions/FunctionsLogical.h" -#include "Functions/IFunction.h" -#include "Functions/IFunctionAdaptors.h" -#include "Functions/indexHint.h" -#include -#include -#include -#include namespace DB @@ -273,10 +268,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) } static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( - const ActionsDAG::Node * node, - const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes, - bool allow_non_deterministic_functions) + const ActionsDAG::Node * node, const Block * allowed_inputs, ActionsDAG::Nodes & additional_nodes, bool allow_partial_result) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -285,13 +277,14 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) + if (const auto * child_copy + = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result)) node_copy.children.push_back(child_copy); - /// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for + /// Expression like (now_allowed AND allowed) is not allowed if allow_partial_result = true. This is important for /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply /// trivial count. - else if (!allow_non_deterministic_functions) + else if (!allow_partial_result) return nullptr; if (node_copy.children.empty()) @@ -300,7 +293,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( if (node_copy.children.size() == 1) { const ActionsDAG::Node * res = node_copy.children.front(); - /// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires + /// Expression like (not_allowed AND 256) can't be reduced to (and(256)) because AND requires /// at least two arguments; also it can't be reduced to (256) because result type is different. if (!res->result_type->equals(*node->result_type)) { @@ -318,7 +311,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result); !child) return nullptr; return &node_copy; @@ -332,7 +325,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions().clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag.getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) + if (const auto * child_copy + = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_partial_result)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -366,22 +360,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) +std::optional +splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result) { if (!predicate) return {}; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_partial_result); if (!res) return {}; return ActionsDAG::cloneSubDAG({res}, true); } -void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) +void filterBlockWithPredicate( + const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_partial_result=*/allow_filtering_with_partial_predicate); if (dag) filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index abf46dc23a4..f76cf2cad76 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -26,9 +26,13 @@ namespace VirtualColumnUtils /// /// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed" /// if there are subqueries. +/// +/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...)))./// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. +/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs). +/// If allow_filtering_with_partial_predicate is true, then the filtering will be done even if some part of the predicate +/// cannot be evaluated using the columns from the block. +void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate = true); -/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))). -void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); /// Just filters block. Block should contain all the required columns. ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context); @@ -41,15 +45,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic -/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr. -/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in +/// When allow_partial_result is false, then the result will be empty if any part of if cannot be evaluated deterministically +/// on the given inputs. +/// allow_partial_result must be false when we are going to use the result to filter parts in /// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is /// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` /// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is /// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial /// count optimization will be mistakenly applied to the query. -std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true); +std::optional splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result = true); /// Extract from the input stream a set of `name` column values template diff --git a/tests/queries/0_stateless/03217_read_rows_in_system_tables.reference b/tests/queries/0_stateless/03217_read_rows_in_system_tables.reference new file mode 100644 index 00000000000..b21ead49b1e --- /dev/null +++ b/tests/queries/0_stateless/03217_read_rows_in_system_tables.reference @@ -0,0 +1,10 @@ +information_schema tables +default test_replica_1 r1 +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + ReadFromMerge + Filter (( + ( + ))) + ReadFromMergeTree (default.test_replica_1) +1 1 +1 1 diff --git a/tests/queries/0_stateless/03217_read_rows_in_system_tables.sql b/tests/queries/0_stateless/03217_read_rows_in_system_tables.sql new file mode 100644 index 00000000000..3bea04ccccf --- /dev/null +++ b/tests/queries/0_stateless/03217_read_rows_in_system_tables.sql @@ -0,0 +1,34 @@ +SELECT database, table FROM system.tables WHERE database = 'information_schema' AND table = 'tables'; + +-- To verify StorageSystemReplicas applies the filter properly +CREATE TABLE test_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217/test_replica', 'r1') + ORDER BY x; +CREATE TABLE test_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217/test_replica', 'r2') + ORDER BY x; + +SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = 'test_replica_1' AND replica_name = 'r1'; + + +-- To verify StorageMerge +CREATE TABLE all_replicas (x UInt32) + ENGINE = Merge(currentDatabase(), 'test_replica_*'); + +INSERT INTO test_replica_1 SELECT number AS x FROM numbers(10); +SYSTEM SYNC REPLICA test_replica_2; +-- If the filter not applied, then the plan will show both replicas +EXPLAIN SELECT _table, count() FROM all_replicas WHERE _table = 'test_replica_1' AND x >= 0 GROUP BY _table; + +SYSTEM FLUSH LOGS; +-- argMin-argMax make the test repeatable + +-- StorageSystemTables +SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';' + AND type = 'QueryFinish'; + +-- StorageSystemReplicas +SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND query LIKE '%SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = \'test_replica_1\' AND replica_name = \'r1\';' + AND type = 'QueryFinish'; From 86e3b35f24a449b63172015a0f768434f9f203c6 Mon Sep 17 00:00:00 2001 From: Jacob Reckhard Date: Tue, 6 Aug 2024 09:07:17 -0600 Subject: [PATCH 019/132] spelling fixes --- src/DataTypes/DataTypeCustomGeo.cpp | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeCustomGeo.cpp b/src/DataTypes/DataTypeCustomGeo.cpp index d72787647c3..f90788ec403 100644 --- a/src/DataTypes/DataTypeCustomGeo.cpp +++ b/src/DataTypes/DataTypeCustomGeo.cpp @@ -24,7 +24,7 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory) std::make_unique(std::make_unique())); }); - // Custom type for mulitple lines stored as Array(LineString) + // Custom type for multiple lines stored as Array(LineString) factory.registerSimpleDataTypeCustom("MultiLineString", [] { return std::make_pair(DataTypeFactory::instance().get("Array(LineString)"), diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 3d7e77f213d..71f5efca893 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2086,6 +2086,7 @@ multiSearchFirstPositionUTF multibyte multidirectory multiline +multilinestring multiplyDecimal multipolygon multisearchany From 5390d1b108956907bf4b038a56fdb2ed8e584308 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 6 Aug 2024 16:12:44 +0200 Subject: [PATCH 020/132] docker: use self-compiled gdb 15 from cctools Signed-off-by: Azat Khuzhin --- docker/packager/binary-builder/Dockerfile | 5 ----- docker/test/fasttest/Dockerfile | 2 +- docker/test/integration/base/Dockerfile | 4 +++- docker/test/integration/runner/Dockerfile | 3 ++- docker/test/performance-comparison/Dockerfile | 4 +++- docker/test/util/Dockerfile | 4 +++- 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index 647ab8758a5..7d6acdcd856 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -6,11 +6,6 @@ ENV CXX=clang++-${LLVM_VERSION} # If the cctools is updated, then first build it in the CI, then update here in a different commit COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools -# TODO: same for gdb and in other places as well -# -# NOTE: here it will add circular dependency but it will be fixed after [1] -# -# [1]: https://github.com/ClickHouse/ClickHouse/issues/66493 # Rust toolchain and libraries ENV RUSTUP_HOME=/rust/rustup diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 4cac2ee6135..5d311c673a4 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -83,7 +83,7 @@ RUN arch=${TARGETARCH:-amd64} \ # Give suid to gdb to grant it attach permissions # chmod 777 to make the container user independent -RUN chmod u+s /usr/bin/gdb \ +RUN chmod u+s /opt/gdb/bin/gdb \ && mkdir -p /var/lib/clickhouse \ && chmod 777 /var/lib/clickhouse diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 469251f648c..dc4d470a262 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -11,7 +11,6 @@ RUN apt-get update \ curl \ default-jre \ g++ \ - gdb \ iproute2 \ krb5-user \ libicu-dev \ @@ -73,3 +72,6 @@ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg && \ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index d250b746e7d..d62009f1be3 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -30,7 +30,6 @@ RUN apt-get update \ luajit \ libssl-dev \ libcurl4-openssl-dev \ - gdb \ default-jdk \ software-properties-common \ libkrb5-dev \ @@ -87,6 +86,8 @@ COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ COPY misc/ /misc/ +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" # Same options as in test/base/Dockerfile # (in case you need to override them in tests) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index c68a39f6f70..f7139275282 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -9,7 +9,6 @@ RUN apt-get update \ curl \ dmidecode \ g++ \ - gdb \ git \ gnuplot \ imagemagick \ @@ -42,6 +41,9 @@ RUN pip3 --no-cache-dir install -r requirements.txt COPY run.sh / +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" + CMD ["bash", "/run.sh"] # docker run --network=host --volume :/workspace --volume=:/output -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/performance-comparison diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index dc928ba7195..8b949ed95db 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -44,7 +44,6 @@ RUN apt-get update \ bash \ bsdmainutils \ build-essential \ - gdb \ git \ gperf \ moreutils \ @@ -58,3 +57,6 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* COPY process_functional_tests_result.py / + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +ENV PATH="/opt/gdb/bin:${PATH}" From deb58b4ede1cabff8e4490fb39b2627ee94a347a Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 3 Jul 2024 18:23:38 +0800 Subject: [PATCH 021/132] any/anti/semi join support mixed join conditions --- src/Interpreters/HashJoin/HashJoin.cpp | 61 +++-- src/Interpreters/HashJoin/HashJoinMethods.h | 139 ++++++++---- src/Interpreters/HashJoin/JoinFeatures.h | 7 +- src/Interpreters/HashJoin/JoinUsedFlags.h | 33 ++- src/Interpreters/HashJoin/LeftHashJoin.cpp | 3 + src/Interpreters/joinDispatch.h | 108 +++++---- src/Storages/StorageJoin.cpp | 6 +- ..._join_on_inequal_expression_fast.reference | 214 ++++++++++++++++++ ...006_join_on_inequal_expression_fast.sql.j2 | 11 + 9 files changed, 469 insertions(+), 113 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 0c7cad4360d..769cb574ed7 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -291,12 +291,13 @@ void HashJoin::dataMapInit(MapsVariant & map) { if (kind == JoinKind::Cross) return; - joinDispatchInit(kind, strictness, map); - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); + auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; + joinDispatchInit(kind, strictness, map, prefer_use_maps_all); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { map_.create(data->type); }); if (reserve_num) { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); } if (!data) @@ -327,9 +328,10 @@ size_t HashJoin::getTotalRowCount() const } else { + auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; for (const auto & map : data->maps) { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { res += map_.getTotalRowCount(data->type); }); } } @@ -367,9 +369,10 @@ size_t HashJoin::getTotalByteCount() const if (data->type != Type::CROSS) { + auto prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; for (const auto & map : data->maps) { - joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); + joinDispatch(kind, strictness, map, prefer_use_maps_all, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); } } return res; @@ -520,6 +523,8 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) return true; } + bool prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; + size_t total_rows = 0; size_t total_bytes = 0; { @@ -592,7 +597,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) bool is_inserted = false; if (kind != JoinKind::Cross) { - joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) + joinDispatch(kind, strictness, data->maps[onexpr_idx], prefer_use_maps_all, [&](auto kind_, auto strictness_, auto & map) { size_t size = HashJoinMethods>::insertFromBlockImpl( *this, @@ -608,10 +613,10 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) is_inserted); if (flag_per_row) - used_flags->reinit(stored_block); + used_flags->reinit, MapsAll>>(stored_block); else if (is_inserted) /// Number of buckets + 1 value from zero storage - used_flags->reinit(size + 1); + used_flags->reinit, MapsAll>>(size + 1); }); } @@ -869,7 +874,7 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block keys.insert(std::move(key)); } - static_assert(!MapGetter::flagged, + static_assert(!MapGetter::flagged, "joinGet are not protected from hash table changes between block processing"); std::vector maps_vector; @@ -910,16 +915,34 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) materializeBlockInplace(block); } + bool prefer_use_maps_all = table_join->getMixedJoinExpression() != nullptr; { std::vectormaps[0])> * > maps_vector; for (size_t i = 0; i < table_join->getClauses().size(); ++i) maps_vector.push_back(&data->maps[i]); - if (joinDispatch(kind, strictness, maps_vector, [&](auto kind_, auto strictness_, auto & maps_vector_) + if (joinDispatch(kind, strictness, maps_vector, prefer_use_maps_all, [&](auto kind_, auto strictness_, auto & maps_vector_) { - using MapType = typename MapGetter::Map; - Block remaining_block = HashJoinMethods::joinBlockImpl( - *this, block, sample_block_with_columns_to_add, maps_vector_); + Block remaining_block; + if constexpr (std::is_same_v, std::vector>) + { + remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + } + else if constexpr (std::is_same_v, std::vector>) + { + remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + } + else if constexpr (std::is_same_v, std::vector>) + { + remaining_block = HashJoinMethods::joinBlockImpl( + *this, block, sample_block_with_columns_to_add, maps_vector_); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown maps type"); + } if (remaining_block.rows()) not_processed = std::make_shared(ExtraBlock{std::move(remaining_block)}); else @@ -1019,7 +1042,8 @@ public: rows_added = fillColumnsFromMap(map, columns_right); }; - if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), fill_callback)) + bool prefer_use_maps_all = parent.table_join->getMixedJoinExpression() != nullptr; + if (!joinDispatch(parent.kind, parent.strictness, parent.data->maps.front(), prefer_use_maps_all, fill_callback)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); } @@ -1216,11 +1240,12 @@ void HashJoin::reuseJoinedData(const HashJoin & join) if (flag_per_row) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); + bool prefer_use_maps_all = join.table_join->getMixedJoinExpression() != nullptr; for (auto & map : data->maps) { - joinDispatch(kind, strictness, map, [this](auto kind_, auto strictness_, auto & map_) + joinDispatch(kind, strictness, map, prefer_use_maps_all, [this](auto kind_, auto strictness_, auto & map_) { - used_flags->reinit(map_.getBufferSizeInCells(data->type) + 1); + used_flags->reinit, MapsAll>>(map_.getBufferSizeInCells(data->type) + 1); }); } } @@ -1300,7 +1325,9 @@ void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additiona additional_filter_expression->dumpActions()); } - bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); + bool is_supported = ((strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind))) + || ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Any || strictness == JoinStrictness::Anti) + && (isLeft(kind) || isRight(kind))); if (!is_supported) { throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 0dfafa94efc..36785c08845 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -112,7 +112,7 @@ public: const MapsTemplateVector & maps_, bool is_join_get = false) { - constexpr JoinFeatures join_features; + constexpr JoinFeatures join_features; std::vector join_on_keys; const auto & onexprs = join.table_join->getClauses(); @@ -358,22 +358,20 @@ private: AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags) { - constexpr JoinFeatures join_features; - if constexpr (join_features.is_all_join) + constexpr JoinFeatures join_features; + if constexpr (join_features.is_maps_all) { - if (added_columns.additional_filter_expression) - { - bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; - return joinRightColumnsWithAddtitionalFilter( - std::forward>(key_getter_vector), - mapv, - added_columns, - used_flags, - need_filter, - join_features.need_flags, - join_features.add_missing, - mark_per_row_used); - } + if (added_columns.additional_filter_expression) + { + bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; + return joinRightColumnsWithAddtitionalFilter( + std::forward>(key_getter_vector), + mapv, + added_columns, + used_flags, + need_filter, + mark_per_row_used); + } } if (added_columns.additional_filter_expression) @@ -394,7 +392,7 @@ private: AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags) { - constexpr JoinFeatures join_features; + constexpr JoinFeatures join_features; size_t rows = added_columns.rows_to_add; if constexpr (need_filter) @@ -474,7 +472,7 @@ private: mapped, added_columns, current_offset, known_rows, used_flags_opt); } } - else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) + else if constexpr (join_features.is_any_join && join_features.inner) { bool used_once = used_flags.template setUsedOnce(find_result); @@ -655,24 +653,23 @@ private: } /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. - template + template static size_t joinRightColumnsWithAddtitionalFilter( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], bool need_filter [[maybe_unused]], - bool need_flags [[maybe_unused]], - bool add_missing [[maybe_unused]], bool flag_per_row [[maybe_unused]]) { + constexpr JoinFeatures join_features; size_t left_block_rows = added_columns.rows_to_add; if (need_filter) added_columns.filter = IColumn::Filter(left_block_rows, 0); std::unique_ptr pool; - if constexpr (need_replication) + if constexpr (join_features.need_replication) added_columns.offsets_to_replicate = std::make_unique(left_block_rows); std::vector row_replicate_offset; @@ -699,7 +696,7 @@ private: selected_rows.clear(); for (; left_row_iter < left_block_rows; ++left_row_iter) { - if constexpr (need_replication) + if constexpr (join_features.need_replication) { if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) { @@ -743,7 +740,7 @@ private: for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) { bool any_matched = false; - /// For all right join, flag_per_row is true, we need mark used flags for each row. + /// For right join, flag_per_row is true, we need mark used flags for each row. if (flag_per_row) { for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) @@ -751,10 +748,26 @@ private: if (filter_flags[replicated_row]) { any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; - if (need_flags) - used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + if constexpr (join_features.is_semi_join || join_features.is_any_join) + { + auto used_once = used_flags.template setUsedOnce(selected_right_row_it->block, selected_right_row_it->row_num, 0); + if (used_once) + { + total_added_rows += 1; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + } + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + } + else + { + total_added_rows += 1; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + } } ++selected_right_row_it; } @@ -763,34 +776,66 @@ private: { for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) { - if (filter_flags[replicated_row]) + if constexpr (join_features.is_anti_join) { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); - total_added_rows += 1; + any_matched |= filter_flags[replicated_row]; + } + else if constexpr (join_features.need_replication) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + else + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + total_added_rows += 1; + selected_right_row_it = selected_right_row_it + row_replicate_offset[i] - replicated_row; + break; + } + else + ++selected_right_row_it; } - ++selected_right_row_it; } } - if (!any_matched) + + + if constexpr (join_features.is_anti_join) { - if (add_missing) - addNotFoundRow(added_columns, total_added_rows); - else - addNotFoundRow(added_columns, total_added_rows); + if (!any_matched) + { + if constexpr (join_features.left) + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + addNotFoundRow(added_columns, total_added_rows); + } } else { - if (!flag_per_row && need_flags) - used_flags.template setUsed(find_results[find_result_index]); - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - if (add_missing) - added_columns.applyLazyDefaults(); + if (!any_matched) + { + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if constexpr (join_features.add_missing) + added_columns.applyLazyDefaults(); + } } find_result_index += (prev_replicated_row != row_replicate_offset[i]); - if constexpr (need_replication) + if constexpr (join_features.need_replication) { (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; } @@ -817,7 +862,7 @@ private: auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); copy_final_matched_rows(left_start_row, filter_col); - if constexpr (need_replication) + if constexpr (join_features.need_replication) { // Add a check for current_added_rows to avoid run the filter expression on too small size batch. if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) @@ -825,7 +870,7 @@ private: } } - if constexpr (need_replication) + if constexpr (join_features.need_replication) { added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); added_columns.filter.resize_assume_reserved(left_row_iter); diff --git a/src/Interpreters/HashJoin/JoinFeatures.h b/src/Interpreters/HashJoin/JoinFeatures.h index 2f2bd1e29a2..a530179f0b4 100644 --- a/src/Interpreters/HashJoin/JoinFeatures.h +++ b/src/Interpreters/HashJoin/JoinFeatures.h @@ -3,15 +3,15 @@ #include namespace DB { -template +template struct JoinFeatures { static constexpr bool is_any_join = STRICTNESS == JoinStrictness::Any; - static constexpr bool is_any_or_semi_join = STRICTNESS == JoinStrictness::Any || STRICTNESS == JoinStrictness::RightAny || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Left); static constexpr bool is_all_join = STRICTNESS == JoinStrictness::All; static constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; static constexpr bool is_semi_join = STRICTNESS == JoinStrictness::Semi; static constexpr bool is_anti_join = STRICTNESS == JoinStrictness::Anti; + static constexpr bool is_any_or_semi_join = is_any_join || STRICTNESS == JoinStrictness::RightAny || (is_semi_join && KIND == JoinKind::Left); static constexpr bool left = KIND == JoinKind::Left; static constexpr bool right = KIND == JoinKind::Right; @@ -22,7 +22,8 @@ struct JoinFeatures static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); static constexpr bool add_missing = (left || full) && !is_semi_join; - static constexpr bool need_flags = MapGetter::flagged; + static constexpr bool need_flags = MapGetter, HashJoin::MapsOne>>::flagged; + static constexpr bool is_maps_all = std::is_same_v, HashJoin::MapsAll>; }; } diff --git a/src/Interpreters/HashJoin/JoinUsedFlags.h b/src/Interpreters/HashJoin/JoinUsedFlags.h index bd41ba2073f..c84c6ec3fea 100644 --- a/src/Interpreters/HashJoin/JoinUsedFlags.h +++ b/src/Interpreters/HashJoin/JoinUsedFlags.h @@ -26,10 +26,10 @@ public: /// Update size for vector with flags. /// Calling this method invalidates existing flags. /// It can be called several times, but all of them should happen before using this structure. - template + template void reinit(size_t size) { - if constexpr (MapGetter::flagged) + if constexpr (MapGetter::flagged) { assert(flags[nullptr].size() <= size); need_flags = true; @@ -43,10 +43,10 @@ public: } } - template + template void reinit(const Block * block_ptr) { - if constexpr (MapGetter::flagged) + if constexpr (MapGetter::flagged) { assert(flags[block_ptr].size() <= block_ptr->rows()); need_flags = true; @@ -148,6 +148,31 @@ public: } } + template + bool setUsedOnce(const Block * block, size_t row_num, size_t offset) + { + if constexpr (!use_flags) + return true; + + if constexpr (flag_per_row) + { + /// fast check to prevent heavy CAS with seq_cst order + if (flags[block][row_num].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[block][row_num].compare_exchange_strong(expected, true); + } + else + { + /// fast check to prevent heavy CAS with seq_cst order + if (flags[nullptr][offset].load(std::memory_order_relaxed)) + return false; + + bool expected = false; + return flags[nullptr][offset].compare_exchange_strong(expected, true); + } + } }; } diff --git a/src/Interpreters/HashJoin/LeftHashJoin.cpp b/src/Interpreters/HashJoin/LeftHashJoin.cpp index 69e17ff70bd..a53ffaac0b5 100644 --- a/src/Interpreters/HashJoin/LeftHashJoin.cpp +++ b/src/Interpreters/HashJoin/LeftHashJoin.cpp @@ -4,8 +4,11 @@ namespace DB { template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; } diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h index 54c5c7dc83a..982c56e8210 100644 --- a/src/Interpreters/joinDispatch.h +++ b/src/Interpreters/joinDispatch.h @@ -12,38 +12,41 @@ namespace DB { -template +template struct MapGetter; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; /// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; -template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; +template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template -struct MapGetter { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; }; +template +struct MapGetter { using Map = HashJoin::MapsAsof; static constexpr bool flagged = false; }; static constexpr std::array STRICTNESSES = { JoinStrictness::RightAny, @@ -62,7 +65,7 @@ static constexpr std::array KINDS = { }; /// Init specified join map -inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin::MapsVariant & maps) +inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin::MapsVariant & maps, bool prefer_use_maps_all = false) { return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij) { @@ -70,7 +73,10 @@ inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin: constexpr auto j = ij % STRICTNESSES.size(); if (kind == KINDS[i] && strictness == STRICTNESSES[j]) { - maps = typename MapGetter::Map(); + if (prefer_use_maps_all) + maps = typename MapGetter::Map(); + else + maps = typename MapGetter::Map(); return true; } return false; @@ -79,7 +85,7 @@ inline bool joinDispatchInit(JoinKind kind, JoinStrictness strictness, HashJoin: /// Call function on specified join map template -inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & maps, Func && func) +inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & maps, bool prefer_use_maps_all, Func && func) { return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij) { @@ -89,10 +95,16 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & constexpr auto j = ij % STRICTNESSES.size(); if (kind == KINDS[i] && strictness == STRICTNESSES[j]) { - func( - std::integral_constant(), - std::integral_constant(), - std::get::Map>(maps)); + if (prefer_use_maps_all) + func( + std::integral_constant(), + std::integral_constant(), + std::get::Map>(maps)); + else + func( + std::integral_constant(), + std::integral_constant(), + std::get::Map>(maps)); return true; } return false; @@ -101,7 +113,7 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, MapsVariant & /// Call function on specified join map template -inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector & mapsv, Func && func) +inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector & mapsv, bool prefer_use_maps_all, Func && func) { return static_for<0, KINDS.size() * STRICTNESSES.size()>([&](auto ij) { @@ -111,17 +123,31 @@ inline bool joinDispatch(JoinKind kind, JoinStrictness strictness, std::vector::Map; - std::vector v; - v.reserve(mapsv.size()); - for (const auto & el : mapsv) - v.push_back(&std::get(*el)); + if (prefer_use_maps_all) + { + using MapType = typename MapGetter::Map; + std::vector v; + v.reserve(mapsv.size()); + for (const auto & el : mapsv) + v.push_back(&std::get(*el)); - func( - std::integral_constant(), - std::integral_constant(), - v - /*std::get::Map>(maps)*/); + func( + std::integral_constant(), std::integral_constant(), v + /*std::get::Map>(maps)*/); + } + else + { + using MapType = typename MapGetter::Map; + std::vector v; + v.reserve(mapsv.size()); + for (const auto & el : mapsv) + v.push_back(&std::get(*el)); + + func( + std::integral_constant(), std::integral_constant(), v + /*std::get::Map>(maps)*/); + + } return true; } return false; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index a0d6cf11b64..974df940e91 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -504,7 +504,11 @@ protected: return {}; Chunk chunk; - if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(), + if (!joinDispatch( + join->kind, + join->strictness, + join->data->maps.front(), + join->table_join->getMixedJoinExpression() != nullptr, [&](auto kind, auto strictness, auto & map) { chunk = createChunk(map); })) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness"); return chunk; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference index 46f24f73356..3e413afd98e 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -382,6 +382,220 @@ key1 e 5 5 5 key1 C 3 4 5 key2 a2 1 1 1 0 0 \N key4 f 2 3 4 key4 F 1 1 1 SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 d 4 7 2 key1 0 0 \N +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SELECT t1.*, t2.* FROM t1 RIGHT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 LEFT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 LEFT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 d 4 7 2 key1 0 0 \N +key1 e 5 5 5 key1 0 0 \N +key2 a2 1 1 1 key2 0 0 \N +key4 f 2 3 4 key4 0 0 \N +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SELECT t1.*, t2.* FROM t1 RIGHT ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT SEMI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT SEMI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT ANTI JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +SET join_algorithm='hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 key1 a 1 1 2 key1 B 2 1 2 diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 index 61ad5ec0bf1..1bf5a7870e7 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 @@ -22,6 +22,17 @@ SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND (t1.a=2 {% endfor -%} {% endfor -%} +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'RIGHT'] -%} +{% for join_strictness in ['ANY', 'SEMI', 'ANTI'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_strictness }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +{% endfor -%} +{% endfor -%} +{% endfor -%} {% for algorithm in ['hash'] -%} SET join_algorithm='{{ algorithm }}'; From 413834d04920072dd9a38b56902d799f71a476a3 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 4 Jul 2024 10:06:11 +0800 Subject: [PATCH 022/132] instance template classes --- src/Interpreters/HashJoin/HashJoinMethods.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 36785c08845..17c11b202ca 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -970,9 +970,12 @@ private: /// Instantiate template class ahead in different .cpp files to avoid `too large translation unit`. extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; From 377eed20fca56c74e4987fe4038fca6f0e019090 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 4 Jul 2024 15:05:24 +0800 Subject: [PATCH 023/132] reduce the size of HashJoin.cpp.o --- src/Interpreters/HashJoin/FullHashJoin.cpp | 2 +- src/Interpreters/HashJoin/HashJoinMethods.h | 834 +--------------- .../HashJoin/HashJoinMethodsImpl.h | 912 ++++++++++++++++++ src/Interpreters/HashJoin/InnerHashJoin.cpp | 2 +- src/Interpreters/HashJoin/LeftHashJoin.cpp | 2 +- src/Interpreters/HashJoin/RightHashJoin.cpp | 2 +- 6 files changed, 932 insertions(+), 822 deletions(-) create mode 100644 src/Interpreters/HashJoin/HashJoinMethodsImpl.h diff --git a/src/Interpreters/HashJoin/FullHashJoin.cpp b/src/Interpreters/HashJoin/FullHashJoin.cpp index 5d058d10fc2..4cdb2e757a4 100644 --- a/src/Interpreters/HashJoin/FullHashJoin.cpp +++ b/src/Interpreters/HashJoin/FullHashJoin.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 17c11b202ca..e3b8fbc1737 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -12,15 +12,8 @@ #include #include - namespace DB { -namespace ErrorCodes -{ - extern const int UNSUPPORTED_JOIN_KEYS; - extern const int LOGICAL_ERROR; -} - /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. template struct Inserter @@ -64,7 +57,6 @@ struct Inserter } }; - /// MapsTemplate is one of MapsOne, MapsAll and MapsAsof template class HashJoinMethods @@ -81,27 +73,7 @@ public: ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, - bool & is_inserted) - { - switch (type) - { - case HashJoin::Type::EMPTY: - [[fallthrough]]; - case HashJoin::Type::CROSS: - /// Do nothing. We will only save block, and it is enough - is_inserted = true; - return 0; - - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - return insertFromBlockImplTypeCase>::Type>(\ - join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ - break; - - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } - } + bool & is_inserted); using MapsTemplateVector = std::vector; @@ -110,278 +82,36 @@ public: Block & block, const Block & block_with_columns_to_add, const MapsTemplateVector & maps_, - bool is_join_get = false) - { - constexpr JoinFeatures join_features; - - std::vector join_on_keys; - const auto & onexprs = join.table_join->getClauses(); - for (size_t i = 0; i < onexprs.size(); ++i) - { - const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; - join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]); - } - size_t existing_columns = block.columns(); - - /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. - * Because if they are constants, then in the "not joined" rows, they may have different values - * - default values, which can differ from the values of these constants. - */ - if constexpr (join_features.right || join_features.full) - { - materializeBlockInplace(block); - } - - /** For LEFT/INNER JOIN, the saved blocks do not contain keys. - * For FULL/RIGHT JOIN, the saved blocks contain keys; - * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. - * For ASOF, the last column is used as the ASOF column - */ - AddedColumns added_columns( - block, - block_with_columns_to_add, - join.savedBlockSample(), - join, - std::move(join_on_keys), - join.table_join->getMixedJoinExpression(), - join_features.is_asof_join, - is_join_get); - - bool has_required_right_keys = (join.required_right_keys.columns() != 0); - added_columns.need_filter = join_features.need_filter || has_required_right_keys; - added_columns.max_joined_block_rows = join.max_joined_block_rows; - if (!added_columns.max_joined_block_rows) - added_columns.max_joined_block_rows = std::numeric_limits::max(); - else - added_columns.reserve(join_features.need_replication); - - size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags); - /// Do not hold memory for join_on_keys anymore - added_columns.join_on_keys.clear(); - Block remaining_block = sliceBlock(block, num_joined); - - added_columns.buildOutput(); - for (size_t i = 0; i < added_columns.size(); ++i) - block.insert(added_columns.moveColumn(i)); - - std::vector right_keys_to_replicate [[maybe_unused]]; - - if constexpr (join_features.need_filter) - { - /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); - - /// Add join key columns from right block if needed using value from left table because of equality - for (size_t i = 0; i < join.required_right_keys.columns(); ++i) - { - const auto & right_key = join.required_right_keys.getByPosition(i); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(join.required_right_keys_sources[i]); - const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); - block.insert(std::move(right_col)); - } - } - else if (has_required_right_keys) - { - /// Add join key columns from right block if needed. - for (size_t i = 0; i < join.required_right_keys.columns(); ++i) - { - const auto & right_key = join.required_right_keys.getByPosition(i); - auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); - /// asof column is already in block. - if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) - continue; - - const auto & left_column = block.getByName(join.required_right_keys_sources[i]); - auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); - block.insert(std::move(right_col)); - - if constexpr (join_features.need_replication) - right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); - } - } - - if constexpr (join_features.need_replication) - { - std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; - - /// If ALL ... JOIN - we replicate all the columns except the new ones. - for (size_t i = 0; i < existing_columns; ++i) - { - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); - } - - /// Replicate additional right keys - for (size_t pos : right_keys_to_replicate) - { - block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); - } - } - - return remaining_block; - } - + bool is_join_get = false); private: template - static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) - { - if constexpr (is_asof_join) - { - auto key_column_copy = key_columns; - auto key_size_copy = key_sizes; - key_column_copy.pop_back(); - key_size_copy.pop_back(); - return KeyGetter(key_column_copy, key_size_copy, nullptr); - } - else - return KeyGetter(key_columns, key_sizes, nullptr); - } + static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes); template - static size_t NO_INLINE insertFromBlockImplTypeCase( + static size_t insertFromBlockImplTypeCase( HashJoin & join, HashMap & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - - const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (is_asof_join) - asof_column = key_columns.back(); - - auto key_getter = createKeyGetter(key_columns, key_sizes); - - /// For ALL and ASOF join always insert values - is_inserted = !mapped_one || is_asof_join; - - for (size_t i = 0; i < rows; ++i) - { - if (null_map && (*null_map)[i]) - { - /// nulls are not inserted into hash table, - /// keep them for RIGHT and FULL joins - is_inserted = true; - continue; - } - - /// Check condition for right table from ON section - if (join_mask && !(*join_mask)[i]) - continue; - - if constexpr (is_asof_join) - Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); - else if constexpr (mapped_one) - is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); - else - Inserter::insertAll(join, map, key_getter, stored_block, i, pool); - } - return map.getBufferSizeInCells(); - } + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted); template static size_t switchJoinRightColumns( const std::vector & mapv, AddedColumns & added_columns, HashJoin::Type type, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; - switch (type) - { - case HashJoin::Type::EMPTY: { - if constexpr (!is_asof_join) - { - using KeyGetter = KeyGetterEmpty; - std::vector key_getter_vector; - key_getter_vector.emplace_back(); - - using MapTypeVal = typename KeyGetter::MappedType; - std::vector a_map_type_vector; - a_map_type_vector.emplace_back(); - return joinRightColumnsSwitchNullability( - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); - } - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); - } - #define M(TYPE) \ - case HashJoin::Type::TYPE: \ - { \ - using MapTypeVal = const typename std::remove_reference_t::element_type; \ - using KeyGetter = typename KeyGetterForType::Type; \ - std::vector a_map_type_vector(mapv.size()); \ - std::vector key_getter_vector; \ - for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ - { \ - const auto & join_on_key = added_columns.join_on_keys[d]; \ - a_map_type_vector[d] = mapv[d]->TYPE.get(); \ - key_getter_vector.push_back(std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ - } \ - return joinRightColumnsSwitchNullability( \ - std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ - } - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - - default: - throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); - } - } + JoinStuff::JoinUsedFlags & used_flags); template static size_t joinRightColumnsSwitchNullability( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - if (added_columns.need_filter) - { - return joinRightColumnsSwitchMultipleDisjuncts( - std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } - else - { - return joinRightColumnsSwitchMultipleDisjuncts( - std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } - } + JoinStuff::JoinUsedFlags & used_flags); template static size_t joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr JoinFeatures join_features; - if constexpr (join_features.is_maps_all) - { - if (added_columns.additional_filter_expression) - { - bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; - return joinRightColumnsWithAddtitionalFilter( - std::forward>(key_getter_vector), - mapv, - added_columns, - used_flags, - need_filter, - mark_per_row_used); - } - } - - if (added_columns.additional_filter_expression) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); - - return mapv.size() > 1 ? joinRightColumns( - std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns( - std::forward>(key_getter_vector), mapv, added_columns, used_flags); - } + JoinStuff::JoinUsedFlags & used_flags); /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). @@ -390,267 +120,17 @@ private: std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr JoinFeatures join_features; - - size_t rows = added_columns.rows_to_add; - if constexpr (need_filter) - added_columns.filter = IColumn::Filter(rows, 0); - - Arena pool; - - if constexpr (join_features.need_replication) - added_columns.offsets_to_replicate = std::make_unique(rows); - - IColumn::Offset current_offset = 0; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t i = 0; - for (; i < rows; ++i) - { - if constexpr (join_features.need_replication) - { - if (unlikely(current_offset >= max_joined_block_rows)) - { - added_columns.offsets_to_replicate->resize_assume_reserved(i); - added_columns.filter.resize_assume_reserved(i); - break; - } - } - - bool right_row_found = false; - - KnownRowsHolder known_rows; - for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) - { - const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(i); - using FindResult = typename KeyGetter::FindResult; - auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); - - if (find_result.isFound()) - { - right_row_found = true; - auto & mapped = find_result.getMapped(); - if constexpr (join_features.is_asof_join) - { - const IColumn & left_asof_key = added_columns.leftAsofKey(); - - auto row_ref = mapped->findAsof(left_asof_key, i); - if (row_ref.block) - { - setUsed(added_columns.filter, i); - if constexpr (flag_per_row) - used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); - else - used_flags.template setUsed(find_result); - - added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); - } - else - addNotFoundRow(added_columns, current_offset); - } - else if constexpr (join_features.is_all_join) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) - { - /// Use first appeared left key + it needs left columns replication - bool used_once = used_flags.template setUsedOnce(find_result); - if (used_once) - { - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - setUsed(added_columns.filter, i); - addFoundRowAll( - mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - } - else if constexpr (join_features.is_any_join && join_features.inner) - { - bool used_once = used_flags.template setUsedOnce(find_result); - - /// Use first appeared left key only - if (used_once) - { - setUsed(added_columns.filter, i); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - } - - break; - } - else if constexpr (join_features.is_any_join && join_features.full) - { - /// TODO - } - else if constexpr (join_features.is_anti_join) - { - if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(find_result); - } - else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); - - if (join_features.is_any_or_semi_join) - { - break; - } - } - } - } - - if (!right_row_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(added_columns.filter, i); - addNotFoundRow(added_columns, current_offset); - } - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - } - - added_columns.applyLazyDefaults(); - return i; - } + JoinStuff::JoinUsedFlags & used_flags); template - static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) - { - if constexpr (need_filter) - filter[pos] = 1; - } + static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]); template static ColumnPtr buildAdditionalFilter( size_t left_start_row, const std::vector & selected_rows, const std::vector & row_replicate_offset, - AddedColumns & added_columns) - { - ColumnPtr result_column; - do - { - if (selected_rows.empty()) - { - result_column = ColumnUInt8::create(); - break; - } - const Block & sample_right_block = *selected_rows.begin()->block; - if (!sample_right_block || !added_columns.additional_filter_expression) - { - auto filter = ColumnUInt8::create(); - filter->insertMany(1, selected_rows.size()); - result_column = std::move(filter); - break; - } - - auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); - if (required_cols.empty()) - { - Block block; - added_columns.additional_filter_expression->execute(block); - result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); - break; - } - NameSet required_column_names; - for (auto & col : required_cols) - required_column_names.insert(col.name); - - Block executed_block; - size_t right_col_pos = 0; - for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) - { - if (required_column_names.contains(col.name)) - { - auto new_col = col.column->cloneEmpty(); - for (const auto & selected_row : selected_rows) - { - const auto & src_col = selected_row.block->getByPosition(right_col_pos); - new_col->insertFrom(*src_col.column, selected_row.row_num); - } - executed_block.insert({std::move(new_col), col.type, col.name}); - } - right_col_pos += 1; - } - if (!executed_block) - { - result_column = ColumnUInt8::create(); - break; - } - - for (const auto & col_name : required_column_names) - { - const auto * src_col = added_columns.left_block.findByName(col_name); - if (!src_col) - continue; - auto new_col = src_col->column->cloneEmpty(); - size_t prev_left_offset = 0; - for (size_t i = 1; i < row_replicate_offset.size(); ++i) - { - const size_t & left_offset = row_replicate_offset[i]; - size_t rows = left_offset - prev_left_offset; - if (rows) - new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); - prev_left_offset = left_offset; - } - executed_block.insert({std::move(new_col), src_col->type, col_name}); - } - if (!executed_block) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", - required_cols.toString(), - sample_right_block.dumpNames(), - added_columns.left_block.dumpNames()); - } - - for (const auto & col : executed_block.getColumnsWithTypeAndName()) - if (!col.column || !col.type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); - - added_columns.additional_filter_expression->execute(executed_block); - result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); - executed_block.clear(); - } while (false); - - result_column = result_column->convertToFullIfNeeded(); - if (result_column->isNullable()) - { - /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros - /// Trying to avoid copying data, since we are the only owner of the column. - ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); - - MutableColumnPtr mutable_column; - { - ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); - result_column.reset(); - mutable_column = IColumn::mutate(std::move(nested_column)); - } - - auto & column_data = assert_cast(*mutable_column).getData(); - const auto & mask_column_data = assert_cast(*mask_column).getData(); - for (size_t i = 0; i < column_data.size(); ++i) - { - if (mask_column_data[i]) - column_data[i] = 0; - } - return mutable_column; - } - return result_column; - } + AddedColumns & added_columns); /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. template @@ -660,241 +140,10 @@ private: AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], bool need_filter [[maybe_unused]], - bool flag_per_row [[maybe_unused]]) - { - constexpr JoinFeatures join_features; - size_t left_block_rows = added_columns.rows_to_add; - if (need_filter) - added_columns.filter = IColumn::Filter(left_block_rows, 0); - - std::unique_ptr pool; - - if constexpr (join_features.need_replication) - added_columns.offsets_to_replicate = std::make_unique(left_block_rows); - - std::vector row_replicate_offset; - row_replicate_offset.reserve(left_block_rows); - - using FindResult = typename KeyGetter::FindResult; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t left_row_iter = 0; - PreSelectedRows selected_rows; - selected_rows.reserve(left_block_rows); - std::vector find_results; - find_results.reserve(left_block_rows); - bool exceeded_max_block_rows = false; - IColumn::Offset total_added_rows = 0; - IColumn::Offset current_added_rows = 0; - - auto collect_keys_matched_rows_refs = [&]() - { - pool = std::make_unique(); - find_results.clear(); - row_replicate_offset.clear(); - row_replicate_offset.push_back(0); - current_added_rows = 0; - selected_rows.clear(); - for (; left_row_iter < left_block_rows; ++left_row_iter) - { - if constexpr (join_features.need_replication) - { - if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) - { - break; - } - } - KnownRowsHolder all_flag_known_rows; - KnownRowsHolder single_flag_know_rows; - for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) - { - const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; - if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); - auto find_result = row_acceptable - ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) - : FindResult(); - - if (find_result.isFound()) - { - auto & mapped = find_result.getMapped(); - find_results.push_back(find_result); - if (flag_per_row) - addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); - else - addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); - } - } - row_replicate_offset.push_back(current_added_rows); - } - }; - - auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) - { - const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); - - size_t prev_replicated_row = 0; - auto selected_right_row_it = selected_rows.begin(); - size_t find_result_index = 0; - for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) - { - bool any_matched = false; - /// For right join, flag_per_row is true, we need mark used flags for each row. - if (flag_per_row) - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - if constexpr (join_features.is_semi_join || join_features.is_any_join) - { - auto used_once = used_flags.template setUsedOnce(selected_right_row_it->block, selected_right_row_it->row_num, 0); - if (used_once) - { - total_added_rows += 1; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); - } - } - else if constexpr (join_features.is_anti_join) - { - if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); - } - else - { - total_added_rows += 1; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); - used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); - } - } - ++selected_right_row_it; - } - } - else - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if constexpr (join_features.is_anti_join) - { - any_matched |= filter_flags[replicated_row]; - } - else if constexpr (join_features.need_replication) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); - total_added_rows += 1; - } - ++selected_right_row_it; - } - else - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); - total_added_rows += 1; - selected_right_row_it = selected_right_row_it + row_replicate_offset[i] - replicated_row; - break; - } - else - ++selected_right_row_it; - } - } - } - - - if constexpr (join_features.is_anti_join) - { - if (!any_matched) - { - if constexpr (join_features.left) - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - addNotFoundRow(added_columns, total_added_rows); - } - } - else - { - if (!any_matched) - { - addNotFoundRow(added_columns, total_added_rows); - } - else - { - if (!flag_per_row) - used_flags.template setUsed(find_results[find_result_index]); - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - if constexpr (join_features.add_missing) - added_columns.applyLazyDefaults(); - } - } - find_result_index += (prev_replicated_row != row_replicate_offset[i]); - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; - } - prev_replicated_row = row_replicate_offset[i]; - } - }; - - while (left_row_iter < left_block_rows && !exceeded_max_block_rows) - { - auto left_start_row = left_row_iter; - collect_keys_matched_rows_refs(); - if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " - "left_start_row: {}", - selected_rows.size(), - current_added_rows, - row_replicate_offset.size(), - left_row_iter, - left_start_row); - } - auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); - copy_final_matched_rows(left_start_row, filter_col); - - if constexpr (join_features.need_replication) - { - // Add a check for current_added_rows to avoid run the filter expression on too small size batch. - if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) - exceeded_max_block_rows = true; - } - } - - if constexpr (join_features.need_replication) - { - added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); - added_columns.filter.resize_assume_reserved(left_row_iter); - } - added_columns.applyLazyDefaults(); - return left_row_iter; - } + bool flag_per_row [[maybe_unused]]); /// Cut first num_rows rows from block in place and returns block with remaining rows - static Block sliceBlock(Block & block, size_t num_rows) - { - size_t total_rows = block.rows(); - if (num_rows >= total_rows) - return {}; - size_t remaining_rows = total_rows - num_rows; - Block remaining_block = block.cloneEmpty(); - for (size_t i = 0; i < block.columns(); ++i) - { - auto & col = block.getByPosition(i); - remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); - col.column = col.column->cut(0, num_rows); - } - return remaining_block; - } + static Block sliceBlock(Block & block, size_t num_rows); /** Since we do not store right key columns, * this function is used to copy left key columns to right key columns. @@ -909,62 +158,11 @@ private: const DataTypePtr & right_key_type, const String & renamed_right_column, const ColumnWithTypeAndName & left_column, - const IColumn::Filter * null_map_filter = nullptr) - { - ColumnWithTypeAndName right_column = left_column; - right_column.name = renamed_right_column; + const IColumn::Filter * null_map_filter = nullptr); - if (null_map_filter) - right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable); - bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); - if (null_map_filter) - correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); - else - correctNullabilityInplace(right_column, should_be_nullable); - - if (!right_column.type->equals(*right_key_type)) - { - right_column.column = castColumnAccurate(right_column, right_key_type); - right_column.type = right_key_type; - } - - right_column.column = right_column.column->convertToFullColumnIfConst(); - return right_column; - } - - static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) - { - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - } - else - { - /// We have to replace values masked by NULLs with defaults. - if (column.column) - if (const auto * nullable_column = checkAndGetColumn(&*column.column)) - column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); - - JoinCommon::removeColumnNullability(column); - } - } - - static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) - { - if (nullable) - { - JoinCommon::convertColumnToNullable(column); - if (column.type->isNullable() && !negative_null_map.empty()) - { - MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); - assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); - column.column = std::move(mutable_column); - } - } - else - JoinCommon::removeColumnNullability(column); - } + static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map); }; /// Instantiate template class ahead in different .cpp files to avoid `too large translation unit`. diff --git a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h new file mode 100644 index 00000000000..2bf5f6aef4a --- /dev/null +++ b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h @@ -0,0 +1,912 @@ +#pragma once +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNSUPPORTED_JOIN_KEYS; + extern const int LOGICAL_ERROR; +} +template +size_t HashJoinMethods::insertFromBlockImpl( + HashJoin & join, + HashJoin::Type type, + MapsTemplate & maps, + size_t rows, + const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, + Block * stored_block, + ConstNullMapPtr null_map, + UInt8ColumnDataPtr join_mask, + Arena & pool, + bool & is_inserted) +{ + switch (type) + { + case HashJoin::Type::EMPTY: + [[fallthrough]]; + case HashJoin::Type::CROSS: + /// Do nothing. We will only save block, and it is enough + is_inserted = true; + return 0; + +#define M(TYPE) \ + case HashJoin::Type::TYPE: \ + return insertFromBlockImplTypeCase< \ + typename KeyGetterForType>::Type>( \ + join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ + break; + + APPLY_FOR_JOIN_VARIANTS(M) +#undef M + } +} + +template +Block HashJoinMethods::joinBlockImpl( + const HashJoin & join, Block & block, const Block & block_with_columns_to_add, const MapsTemplateVector & maps_, bool is_join_get) +{ + constexpr JoinFeatures join_features; + + std::vector join_on_keys; + const auto & onexprs = join.table_join->getClauses(); + for (size_t i = 0; i < onexprs.size(); ++i) + { + const auto & key_names = !is_join_get ? onexprs[i].key_names_left : onexprs[i].key_names_right; + join_on_keys.emplace_back(block, key_names, onexprs[i].condColumnNames().first, join.key_sizes[i]); + } + size_t existing_columns = block.columns(); + + /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. + * Because if they are constants, then in the "not joined" rows, they may have different values + * - default values, which can differ from the values of these constants. + */ + if constexpr (join_features.right || join_features.full) + { + materializeBlockInplace(block); + } + + /** For LEFT/INNER JOIN, the saved blocks do not contain keys. + * For FULL/RIGHT JOIN, the saved blocks contain keys; + * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. + * For ASOF, the last column is used as the ASOF column + */ + AddedColumns added_columns( + block, + block_with_columns_to_add, + join.savedBlockSample(), + join, + std::move(join_on_keys), + join.table_join->getMixedJoinExpression(), + join_features.is_asof_join, + is_join_get); + + bool has_required_right_keys = (join.required_right_keys.columns() != 0); + added_columns.need_filter = join_features.need_filter || has_required_right_keys; + added_columns.max_joined_block_rows = join.max_joined_block_rows; + if (!added_columns.max_joined_block_rows) + added_columns.max_joined_block_rows = std::numeric_limits::max(); + else + added_columns.reserve(join_features.need_replication); + + size_t num_joined = switchJoinRightColumns(maps_, added_columns, join.data->type, *join.used_flags); + /// Do not hold memory for join_on_keys anymore + added_columns.join_on_keys.clear(); + Block remaining_block = sliceBlock(block, num_joined); + + added_columns.buildOutput(); + for (size_t i = 0; i < added_columns.size(); ++i) + block.insert(added_columns.moveColumn(i)); + + std::vector right_keys_to_replicate [[maybe_unused]]; + + if constexpr (join_features.need_filter) + { + /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(added_columns.filter, -1); + + /// Add join key columns from right block if needed using value from left table because of equality + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + const auto & right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column); + block.insert(std::move(right_col)); + } + } + else if (has_required_right_keys) + { + /// Add join key columns from right block if needed. + for (size_t i = 0; i < join.required_right_keys.columns(); ++i) + { + const auto & right_key = join.required_right_keys.getByPosition(i); + auto right_col_name = join.getTableJoin().renamedRightColumnName(right_key.name); + /// asof column is already in block. + if (join_features.is_asof_join && right_key.name == join.table_join->getOnlyClause().key_names_right.back()) + continue; + + const auto & left_column = block.getByName(join.required_right_keys_sources[i]); + auto right_col = copyLeftKeyColumnToRight(right_key.type, right_col_name, left_column, &added_columns.filter); + block.insert(std::move(right_col)); + + if constexpr (join_features.need_replication) + right_keys_to_replicate.push_back(block.getPositionByName(right_col_name)); + } + } + + if constexpr (join_features.need_replication) + { + std::unique_ptr & offsets_to_replicate = added_columns.offsets_to_replicate; + + /// If ALL ... JOIN - we replicate all the columns except the new ones. + for (size_t i = 0; i < existing_columns; ++i) + { + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + } + + /// Replicate additional right keys + for (size_t pos : right_keys_to_replicate) + { + block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); + } + } + + return remaining_block; +} + +template +template +KeyGetter HashJoinMethods::createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) +{ + if constexpr (is_asof_join) + { + auto key_column_copy = key_columns; + auto key_size_copy = key_sizes; + key_column_copy.pop_back(); + key_size_copy.pop_back(); + return KeyGetter(key_column_copy, key_size_copy, nullptr); + } + else + return KeyGetter(key_columns, key_sizes, nullptr); +} + +template +template +size_t HashJoinMethods::insertFromBlockImplTypeCase( + HashJoin & join, + HashMap & map, + size_t rows, + const ColumnRawPtrs & key_columns, + const Sizes & key_sizes, + Block * stored_block, + ConstNullMapPtr null_map, + UInt8ColumnDataPtr join_mask, + Arena & pool, + bool & is_inserted) +{ + [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + + const IColumn * asof_column [[maybe_unused]] = nullptr; + if constexpr (is_asof_join) + asof_column = key_columns.back(); + + auto key_getter = createKeyGetter(key_columns, key_sizes); + + /// For ALL and ASOF join always insert values + is_inserted = !mapped_one || is_asof_join; + + for (size_t i = 0; i < rows; ++i) + { + if (null_map && (*null_map)[i]) + { + /// nulls are not inserted into hash table, + /// keep them for RIGHT and FULL joins + is_inserted = true; + continue; + } + + /// Check condition for right table from ON section + if (join_mask && !(*join_mask)[i]) + continue; + + if constexpr (is_asof_join) + Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); + else if constexpr (mapped_one) + is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); + else + Inserter::insertAll(join, map, key_getter, stored_block, i, pool); + } + return map.getBufferSizeInCells(); +} + +template +template +size_t HashJoinMethods::switchJoinRightColumns( + const std::vector & mapv, + AddedColumns & added_columns, + HashJoin::Type type, + JoinStuff::JoinUsedFlags & used_flags) +{ + constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; + switch (type) + { + case HashJoin::Type::EMPTY: { + if constexpr (!is_asof_join) + { + using KeyGetter = KeyGetterEmpty; + std::vector key_getter_vector; + key_getter_vector.emplace_back(); + + using MapTypeVal = typename KeyGetter::MappedType; + std::vector a_map_type_vector; + a_map_type_vector.emplace_back(); + return joinRightColumnsSwitchNullability( + std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); + } + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys. Type: {}", type); + } +#define M(TYPE) \ + case HashJoin::Type::TYPE: { \ + using MapTypeVal = const typename std::remove_reference_t::element_type; \ + using KeyGetter = typename KeyGetterForType::Type; \ + std::vector a_map_type_vector(mapv.size()); \ + std::vector key_getter_vector; \ + for (size_t d = 0; d < added_columns.join_on_keys.size(); ++d) \ + { \ + const auto & join_on_key = added_columns.join_on_keys[d]; \ + a_map_type_vector[d] = mapv[d]->TYPE.get(); \ + key_getter_vector.push_back( \ + std::move(createKeyGetter(join_on_key.key_columns, join_on_key.key_sizes))); \ + } \ + return joinRightColumnsSwitchNullability(std::move(key_getter_vector), a_map_type_vector, added_columns, used_flags); \ + } + APPLY_FOR_JOIN_VARIANTS(M) +#undef M + + default: + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", type); + } +} + +template +template +size_t HashJoinMethods::joinRightColumnsSwitchNullability( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) +{ + if (added_columns.need_filter) + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } + else + { + return joinRightColumnsSwitchMultipleDisjuncts( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); + } +} + +template +template +size_t HashJoinMethods::joinRightColumnsSwitchMultipleDisjuncts( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) +{ + constexpr JoinFeatures join_features; + if constexpr (join_features.is_maps_all) + { + if (added_columns.additional_filter_expression) + { + bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; + return joinRightColumnsWithAddtitionalFilter( + std::forward>(key_getter_vector), mapv, added_columns, used_flags, need_filter, mark_per_row_used); + } + } + + if (added_columns.additional_filter_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); + + return mapv.size() > 1 ? joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns( + std::forward>(key_getter_vector), mapv, added_columns, used_flags); +} + + +/// Joins right table columns which indexes are present in right_indexes using specified map. +/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). +template +template +size_t HashJoinMethods::joinRightColumns( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags) +{ + constexpr JoinFeatures join_features; + + size_t rows = added_columns.rows_to_add; + if constexpr (need_filter) + added_columns.filter = IColumn::Filter(rows, 0); + + Arena pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t i = 0; + for (; i < rows; ++i) + { + if constexpr (join_features.need_replication) + { + if (unlikely(current_offset >= max_joined_block_rows)) + { + added_columns.offsets_to_replicate->resize_assume_reserved(i); + added_columns.filter.resize_assume_reserved(i); + break; + } + } + + bool right_row_found = false; + + KnownRowsHolder known_rows; + for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) + { + const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(i); + using FindResult = typename KeyGetter::FindResult; + auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); + + if (find_result.isFound()) + { + right_row_found = true; + auto & mapped = find_result.getMapped(); + if constexpr (join_features.is_asof_join) + { + const IColumn & left_asof_key = added_columns.leftAsofKey(); + + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref.block) + { + setUsed(added_columns.filter, i); + if constexpr (flag_per_row) + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); + else + used_flags.template setUsed(find_result); + + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); + } + else + addNotFoundRow(added_columns, current_offset); + } + else if constexpr (join_features.is_all_join) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) + { + /// Use first appeared left key + it needs left columns replication + bool used_once = used_flags.template setUsedOnce(find_result); + if (used_once) + { + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + setUsed(added_columns.filter, i); + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + } + else if constexpr (join_features.is_any_join && join_features.inner) + { + bool used_once = used_flags.template setUsedOnce(find_result); + + /// Use first appeared left key only + if (used_once) + { + setUsed(added_columns.filter, i); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); + } + + break; + } + else if constexpr (join_features.is_any_join && join_features.full) + { + /// TODO + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(find_result); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); + + if (join_features.is_any_or_semi_join) + { + break; + } + } + } + } + + if (!right_row_found) + { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(added_columns.filter, i); + addNotFoundRow(added_columns, current_offset); + } + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[i] = current_offset; + } + } + + added_columns.applyLazyDefaults(); + return i; +} + +template +template +void HashJoinMethods::setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]) +{ + if constexpr (need_filter) + filter[pos] = 1; +} + +template +template +ColumnPtr HashJoinMethods::buildAdditionalFilter( + size_t left_start_row, + const std::vector & selected_rows, + const std::vector & row_replicate_offset, + AddedColumns & added_columns) +{ + ColumnPtr result_column; + do + { + if (selected_rows.empty()) + { + result_column = ColumnUInt8::create(); + break; + } + const Block & sample_right_block = *selected_rows.begin()->block; + if (!sample_right_block || !added_columns.additional_filter_expression) + { + auto filter = ColumnUInt8::create(); + filter->insertMany(1, selected_rows.size()); + result_column = std::move(filter); + break; + } + + auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); + if (required_cols.empty()) + { + Block block; + added_columns.additional_filter_expression->execute(block); + result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); + break; + } + NameSet required_column_names; + for (auto & col : required_cols) + required_column_names.insert(col.name); + + Block executed_block; + size_t right_col_pos = 0; + for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) + { + if (required_column_names.contains(col.name)) + { + auto new_col = col.column->cloneEmpty(); + for (const auto & selected_row : selected_rows) + { + const auto & src_col = selected_row.block->getByPosition(right_col_pos); + new_col->insertFrom(*src_col.column, selected_row.row_num); + } + executed_block.insert({std::move(new_col), col.type, col.name}); + } + right_col_pos += 1; + } + if (!executed_block) + { + result_column = ColumnUInt8::create(); + break; + } + + for (const auto & col_name : required_column_names) + { + const auto * src_col = added_columns.left_block.findByName(col_name); + if (!src_col) + continue; + auto new_col = src_col->column->cloneEmpty(); + size_t prev_left_offset = 0; + for (size_t i = 1; i < row_replicate_offset.size(); ++i) + { + const size_t & left_offset = row_replicate_offset[i]; + size_t rows = left_offset - prev_left_offset; + if (rows) + new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); + prev_left_offset = left_offset; + } + executed_block.insert({std::move(new_col), src_col->type, col_name}); + } + if (!executed_block) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", + required_cols.toString(), + sample_right_block.dumpNames(), + added_columns.left_block.dumpNames()); + } + + for (const auto & col : executed_block.getColumnsWithTypeAndName()) + if (!col.column || !col.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); + + added_columns.additional_filter_expression->execute(executed_block); + result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); + executed_block.clear(); + } while (false); + + result_column = result_column->convertToFullIfNeeded(); + if (result_column->isNullable()) + { + /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros + /// Trying to avoid copying data, since we are the only owner of the column. + ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); + + MutableColumnPtr mutable_column; + { + ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); + result_column.reset(); + mutable_column = IColumn::mutate(std::move(nested_column)); + } + + auto & column_data = assert_cast(*mutable_column).getData(); + const auto & mask_column_data = assert_cast(*mask_column).getData(); + for (size_t i = 0; i < column_data.size(); ++i) + { + if (mask_column_data[i]) + column_data[i] = 0; + } + return mutable_column; + } + return result_column; +} + +template +template +size_t HashJoinMethods::joinRightColumnsWithAddtitionalFilter( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], + bool need_filter [[maybe_unused]], + bool flag_per_row [[maybe_unused]]) +{ + constexpr JoinFeatures join_features; + size_t left_block_rows = added_columns.rows_to_add; + if (need_filter) + added_columns.filter = IColumn::Filter(left_block_rows, 0); + + std::unique_ptr pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(left_block_rows); + + std::vector row_replicate_offset; + row_replicate_offset.reserve(left_block_rows); + + using FindResult = typename KeyGetter::FindResult; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t left_row_iter = 0; + PreSelectedRows selected_rows; + selected_rows.reserve(left_block_rows); + std::vector find_results; + find_results.reserve(left_block_rows); + bool exceeded_max_block_rows = false; + IColumn::Offset total_added_rows = 0; + IColumn::Offset current_added_rows = 0; + + auto collect_keys_matched_rows_refs = [&]() + { + pool = std::make_unique(); + find_results.clear(); + row_replicate_offset.clear(); + row_replicate_offset.push_back(0); + current_added_rows = 0; + selected_rows.clear(); + for (; left_row_iter < left_block_rows; ++left_row_iter) + { + if constexpr (join_features.need_replication) + { + if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) + { + break; + } + } + KnownRowsHolder all_flag_known_rows; + KnownRowsHolder single_flag_know_rows; + for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) + { + const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; + if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); + auto find_result = row_acceptable + ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) + : FindResult(); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + find_results.push_back(find_result); + if (flag_per_row) + addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); + else + addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); + } + } + row_replicate_offset.push_back(current_added_rows); + } + }; + + auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) + { + const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); + + size_t prev_replicated_row = 0; + auto selected_right_row_it = selected_rows.begin(); + size_t find_result_index = 0; + for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) + { + bool any_matched = false; + /// For right join, flag_per_row is true, we need mark used flags for each row. + if (flag_per_row) + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + if constexpr (join_features.is_semi_join || join_features.is_any_join) + { + auto used_once = used_flags.template setUsedOnce( + selected_right_row_it->block, selected_right_row_it->row_num, 0); + if (used_once) + { + total_added_rows += 1; + added_columns.appendFromBlock( + *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + } + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + } + else + { + total_added_rows += 1; + added_columns.appendFromBlock( + *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + used_flags.template setUsed( + selected_right_row_it->block, selected_right_row_it->row_num, 0); + } + } + ++selected_right_row_it; + } + } + else + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if constexpr (join_features.is_anti_join) + { + any_matched |= filter_flags[replicated_row]; + } + else if constexpr (join_features.need_replication) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock( + *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + else + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock( + *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + total_added_rows += 1; + selected_right_row_it = selected_right_row_it + row_replicate_offset[i] - replicated_row; + break; + } + else + ++selected_right_row_it; + } + } + } + + + if constexpr (join_features.is_anti_join) + { + if (!any_matched) + { + if constexpr (join_features.left) + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + addNotFoundRow(added_columns, total_added_rows); + } + } + else + { + if (!any_matched) + { + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if constexpr (join_features.add_missing) + added_columns.applyLazyDefaults(); + } + } + find_result_index += (prev_replicated_row != row_replicate_offset[i]); + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; + } + prev_replicated_row = row_replicate_offset[i]; + } + }; + + while (left_row_iter < left_block_rows && !exceeded_max_block_rows) + { + auto left_start_row = left_row_iter; + collect_keys_matched_rows_refs(); + if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " + "left_start_row: {}", + selected_rows.size(), + current_added_rows, + row_replicate_offset.size(), + left_row_iter, + left_start_row); + } + auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); + copy_final_matched_rows(left_start_row, filter_col); + + if constexpr (join_features.need_replication) + { + // Add a check for current_added_rows to avoid run the filter expression on too small size batch. + if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) + exceeded_max_block_rows = true; + } + } + + if constexpr (join_features.need_replication) + { + added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); + added_columns.filter.resize_assume_reserved(left_row_iter); + } + added_columns.applyLazyDefaults(); + return left_row_iter; +} + +template +Block HashJoinMethods::sliceBlock(Block & block, size_t num_rows) +{ + size_t total_rows = block.rows(); + if (num_rows >= total_rows) + return {}; + size_t remaining_rows = total_rows - num_rows; + Block remaining_block = block.cloneEmpty(); + for (size_t i = 0; i < block.columns(); ++i) + { + auto & col = block.getByPosition(i); + remaining_block.getByPosition(i).column = col.column->cut(num_rows, remaining_rows); + col.column = col.column->cut(0, num_rows); + } + return remaining_block; +} + +template +ColumnWithTypeAndName HashJoinMethods::copyLeftKeyColumnToRight( + const DataTypePtr & right_key_type, + const String & renamed_right_column, + const ColumnWithTypeAndName & left_column, + const IColumn::Filter * null_map_filter) +{ + ColumnWithTypeAndName right_column = left_column; + right_column.name = renamed_right_column; + + if (null_map_filter) + right_column.column = JoinCommon::filterWithBlanks(right_column.column, *null_map_filter); + + bool should_be_nullable = isNullableOrLowCardinalityNullable(right_key_type); + if (null_map_filter) + correctNullabilityInplace(right_column, should_be_nullable, *null_map_filter); + else + correctNullabilityInplace(right_column, should_be_nullable); + + if (!right_column.type->equals(*right_key_type)) + { + right_column.column = castColumnAccurate(right_column, right_key_type); + right_column.type = right_key_type; + } + + right_column.column = right_column.column->convertToFullColumnIfConst(); + return right_column; +} + +template +void HashJoinMethods::correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable) +{ + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + } + else + { + /// We have to replace values masked by NULLs with defaults. + if (column.column) + if (const auto * nullable_column = checkAndGetColumn(&*column.column)) + column.column = JoinCommon::filterWithBlanks(column.column, nullable_column->getNullMapColumn().getData(), true); + + JoinCommon::removeColumnNullability(column); + } +} + +template +void HashJoinMethods::correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) +{ + if (nullable) + { + JoinCommon::convertColumnToNullable(column); + if (column.type->isNullable() && !negative_null_map.empty()) + { + MutableColumnPtr mutable_column = IColumn::mutate(std::move(column.column)); + assert_cast(*mutable_column).applyNegatedNullMap(negative_null_map); + column.column = std::move(mutable_column); + } + } + else + JoinCommon::removeColumnNullability(column); +} +} + diff --git a/src/Interpreters/HashJoin/InnerHashJoin.cpp b/src/Interpreters/HashJoin/InnerHashJoin.cpp index 85aedf3a8e5..258e3143996 100644 --- a/src/Interpreters/HashJoin/InnerHashJoin.cpp +++ b/src/Interpreters/HashJoin/InnerHashJoin.cpp @@ -1,5 +1,5 @@ -#include +#include namespace DB { diff --git a/src/Interpreters/HashJoin/LeftHashJoin.cpp b/src/Interpreters/HashJoin/LeftHashJoin.cpp index a53ffaac0b5..4e06789570e 100644 --- a/src/Interpreters/HashJoin/LeftHashJoin.cpp +++ b/src/Interpreters/HashJoin/LeftHashJoin.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/Interpreters/HashJoin/RightHashJoin.cpp b/src/Interpreters/HashJoin/RightHashJoin.cpp index 8e304754f5c..d9d41d7d63c 100644 --- a/src/Interpreters/HashJoin/RightHashJoin.cpp +++ b/src/Interpreters/HashJoin/RightHashJoin.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { From bd4d648f63d56d11c729f719f864a8b9985c43c3 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 23 Jul 2024 10:41:57 +0800 Subject: [PATCH 024/132] update doc --- docs/en/sql-reference/statements/select/join.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 96d9d26977d..b228f7025c4 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -186,7 +186,7 @@ Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`. ::: -Clickhouse currently supports `ALL INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`. +Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`. **Example** From 58b7ac2264eb6d2ba83d634e1d52b874ca54d9c7 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 7 Aug 2024 11:36:01 +0800 Subject: [PATCH 025/132] update --- src/Interpreters/HashJoin/HashJoin.cpp | 2 +- src/Interpreters/HashJoin/HashJoinMethods.h | 1 + src/Interpreters/HashJoin/InnerHashJoin.cpp | 1 + src/Interpreters/joinDispatch.h | 20 +++-- ..._join_on_inequal_expression_fast.reference | 78 +++++++++++++++++++ ...006_join_on_inequal_expression_fast.sql.j2 | 31 ++++++++ 6 files changed, 127 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 769cb574ed7..4033d1e3035 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -1327,7 +1327,7 @@ void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additiona bool is_supported = ((strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind))) || ((strictness == JoinStrictness::Semi || strictness == JoinStrictness::Any || strictness == JoinStrictness::Anti) - && (isLeft(kind) || isRight(kind))); + && (isLeft(kind) || isRight(kind))) || (strictness == JoinStrictness::Any && (isInner(kind))); if (!is_supported) { throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index e3b8fbc1737..3b7a67467e3 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -185,6 +185,7 @@ extern template class HashJoinMethods; extern template class HashJoinMethods; +extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; extern template class HashJoinMethods; diff --git a/src/Interpreters/HashJoin/InnerHashJoin.cpp b/src/Interpreters/HashJoin/InnerHashJoin.cpp index 258e3143996..69f4c620cb8 100644 --- a/src/Interpreters/HashJoin/InnerHashJoin.cpp +++ b/src/Interpreters/HashJoin/InnerHashJoin.cpp @@ -5,6 +5,7 @@ namespace DB { template class HashJoinMethods; template class HashJoinMethods; +template class HashJoinMethods; template class HashJoinMethods; template class HashJoinMethods; template class HashJoinMethods; diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h index 982c56e8210..5d4bd8f92e5 100644 --- a/src/Interpreters/joinDispatch.h +++ b/src/Interpreters/joinDispatch.h @@ -12,6 +12,15 @@ namespace DB { +/// HashJoin::MapsOne is more efficient, it only store one row for each key in the map. It is recommended to use it whenever possible. +/// When only need to match only one row from right table, use HashJoin::MapsOne. For example, LEFT ANY/SEMI/ANTI. +/// +/// HashJoin::MapsAll will store all rows for each key in the map. It is used when need to match multiple rows from right table. +/// For example, RIGHT ANY/ALL, FULL JOIN, INNER JOIN. +/// +/// prefer_use_maps_all is true when there is mixed inequal condition in the join condition. For example, `t1.a = t2.a AND t1.b > t2.b`. +/// In this case, we need to use HashJoin::MapsAll to store all rows for each key in the map. We will select all matched rows from the map +/// and filter them by `t1.b > t2.b`. template struct MapGetter; @@ -21,8 +30,9 @@ template struct MapGetter struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; -template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; @@ -33,14 +43,14 @@ template struct MapGetter struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -/// Only SEMI LEFT and SEMI RIGHT are valid. INNER and FULL are here for templates instantiation. +/// Only ANTI LEFT and ANTI RIGHT are valid. INNER and FULL are here for templates instantiation. template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference index 3e413afd98e..11ac01d24d5 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -1,3 +1,4 @@ +03006_join_on_inequal_expression_fast.sql -- { echoOn } SET join_algorithm='hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); @@ -596,6 +597,39 @@ SELECT t1.*, t2.* from t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key and (t1.a < t2. 0 0 \N key4 F 1 1 1 SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT ANTI JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); SET join_algorithm='hash'; +SELECT t1.* FROM t1 LEFT ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 b 2 3 2 +key1 c 3 2 1 +key1 d 4 7 2 +key1 e 5 5 5 +key2 a2 1 1 1 +key4 f 2 3 4 +SELECT t1.* FROM t1 LEFT SEMI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 b 2 3 2 +key1 c 3 2 1 +key1 d 4 7 2 +key2 a2 1 1 1 +key4 f 2 3 4 +SELECT t1.* FROM t1 LEFT ANTI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 e 5 5 5 +SELECT t1.* FROM t1 RIGHT ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +SELECT t1.* FROM t1 RIGHT SEMI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +key1 a 1 1 2 +SELECT t1.* FROM t1 RIGHT ANTI JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +SET join_algorithm='hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); key1 a 1 1 2 key1 A 1 2 1 key1 a 1 1 2 key1 B 2 1 2 @@ -666,3 +700,47 @@ key2 a2 1 1 1 key1 A 1 2 1 key2 a2 1 1 1 key3 a3 1 1 1 key2 a2 1 1 1 key4 F 1 1 1 key4 f 2 3 4 key1 B 2 1 2 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 c 3 2 1 key1 B 2 1 2 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER ANY JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER ANY JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SET join_algorithm='hash'; +SELECT t1.* FROM t1 INNER ANY JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +key1 a 1 1 2 +key1 b 2 3 2 +key1 c 3 2 1 +key1 d 4 7 2 +key2 a2 1 1 1 +key4 f 2 3 4 diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 index 1bf5a7870e7..b300881c562 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 @@ -34,12 +34,43 @@ SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_stri {% endfor -%} {% endfor -%} +{% for algorithm in ['hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'RIGHT'] -%} +{% for join_strictness in ['ANY', 'SEMI', 'ANTI'] -%} +SELECT t1.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +{% endfor -%} +{% endfor -%} +{% endfor -%} + {% for algorithm in ['hash'] -%} SET join_algorithm='{{ algorithm }}'; {% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); {% endfor -%} {% endfor -%} + +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['INNER'] -%} +{% for join_strictness in ['ANY'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} {{ join_strictness }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +{% endfor -%} +{% endfor -%} +{% endfor -%} + +{% for algorithm in ['hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['INNER'] -%} +{% for join_strictness in ['ANY'] -%} +SELECT t1.* FROM t1 {{ join_type }} {{ join_strictness }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY ALL; +{% endfor -%} +{% endfor -%} +{% endfor -%} + -- { echoOff } -- test error messages From 5eb896b9f1976feaa423071919e65d22e09da4ea Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 7 Aug 2024 10:43:41 +0200 Subject: [PATCH 026/132] Add documentation for toDecimal32 and variants --- .../functions/type-conversion-functions.md | 471 ++++++++++++------ 1 file changed, 305 insertions(+), 166 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 1e618b8cdab..24055bb99b7 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -95,7 +95,7 @@ SELECT toInt8(-8), toInt8(-8.8), toInt8('-8') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -156,7 +156,7 @@ Query: SELECT toInt8OrZero('-8'), toInt8OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -216,7 +216,7 @@ Query: SELECT toInt8OrNull('-8'), toInt8OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -281,7 +281,7 @@ Query: SELECT toInt8OrDefault('-8', CAST('-1', 'Int8')), toInt8OrDefault('abc', CAST('-1', 'Int8')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -345,7 +345,7 @@ SELECT toInt16(-16), toInt16(-16.16), toInt16('-16') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -406,7 +406,7 @@ Query: SELECT toInt16OrZero('-16'), toInt16OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -466,7 +466,7 @@ Query: SELECT toInt16OrNull('-16'), toInt16OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -531,7 +531,7 @@ Query: SELECT toInt16OrDefault('-16', CAST('-1', 'Int16')), toInt16OrDefault('abc', CAST('-1', 'Int16')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -595,7 +595,7 @@ SELECT toInt32(-32), toInt32(-32.32), toInt32('-32') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -656,7 +656,7 @@ Query: SELECT toInt32OrZero('-32'), toInt32OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -715,7 +715,7 @@ Query: SELECT toInt32OrNull('-32'), toInt32OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -780,7 +780,7 @@ Query: SELECT toInt32OrDefault('-32', CAST('-1', 'Int32')), toInt32OrDefault('abc', CAST('-1', 'Int32')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -844,7 +844,7 @@ SELECT toInt64(-64), toInt64(-64.64), toInt64('-64') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -905,7 +905,7 @@ Query: SELECT toInt64OrZero('-64'), toInt64OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -965,7 +965,7 @@ Query: SELECT toInt64OrNull('-64'), toInt64OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1030,7 +1030,7 @@ Query: SELECT toInt64OrDefault('-64', CAST('-1', 'Int64')), toInt64OrDefault('abc', CAST('-1', 'Int64')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1093,7 +1093,7 @@ SELECT toInt128(-128), toInt128(-128.8), toInt128('-128') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1154,7 +1154,7 @@ Query: SELECT toInt128OrZero('-128'), toInt128OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1214,7 +1214,7 @@ Query: SELECT toInt128OrNull('-128'), toInt128OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1279,7 +1279,7 @@ Query: SELECT toInt128OrDefault('-128', CAST('-1', 'Int128')), toInt128OrDefault('abc', CAST('-1', 'Int128')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1342,7 +1342,7 @@ SELECT toInt256(-256), toInt256(-256.256), toInt256('-256') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1403,7 +1403,7 @@ Query: SELECT toInt256OrZero('-256'), toInt256OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1463,7 +1463,7 @@ Query: SELECT toInt256OrNull('-256'), toInt256OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1528,7 +1528,7 @@ Query: SELECT toInt256OrDefault('-256', CAST('-1', 'Int256')), toInt256OrDefault('abc', CAST('-1', 'Int256')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1592,7 +1592,7 @@ SELECT toUInt8(8), toUInt8(8.8), toUInt8('8') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1653,7 +1653,7 @@ Query: SELECT toUInt8OrZero('-8'), toUInt8OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1713,7 +1713,7 @@ Query: SELECT toUInt8OrNull('8'), toUInt8OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1778,7 +1778,7 @@ Query: SELECT toUInt8OrDefault('8', CAST('0', 'UInt8')), toUInt8OrDefault('abc', CAST('0', 'UInt8')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1842,7 +1842,7 @@ SELECT toUInt16(16), toUInt16(16.16), toUInt16('16') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1903,7 +1903,7 @@ Query: SELECT toUInt16OrZero('16'), toUInt16OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -1963,7 +1963,7 @@ Query: SELECT toUInt16OrNull('16'), toUInt16OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2028,7 +2028,7 @@ Query: SELECT toUInt16OrDefault('16', CAST('0', 'UInt16')), toUInt16OrDefault('abc', CAST('0', 'UInt16')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2092,7 +2092,7 @@ SELECT toUInt32(32), toUInt32(32.32), toUInt32('32') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2154,7 +2154,7 @@ Query: SELECT toUInt32OrZero('32'), toUInt32OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2214,7 +2214,7 @@ Query: SELECT toUInt32OrNull('32'), toUInt32OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2279,7 +2279,7 @@ Query: SELECT toUInt32OrDefault('32', CAST('0', 'UInt32')), toUInt32OrDefault('abc', CAST('0', 'UInt32')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2343,7 +2343,7 @@ SELECT toUInt64(64), toUInt64(64.64), toUInt64('64') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2404,7 +2404,7 @@ Query: SELECT toUInt64OrZero('64'), toUInt64OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2464,7 +2464,7 @@ Query: SELECT toUInt64OrNull('64'), toUInt64OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2529,7 +2529,7 @@ Query: SELECT toUInt64OrDefault('64', CAST('0', 'UInt64')), toUInt64OrDefault('abc', CAST('0', 'UInt64')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2592,7 +2592,7 @@ SELECT toUInt128(128), toUInt128(128.8), toUInt128('128') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2653,7 +2653,7 @@ Query: SELECT toUInt128OrZero('128'), toUInt128OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2713,7 +2713,7 @@ Query: SELECT toUInt128OrNull('128'), toUInt128OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2778,7 +2778,7 @@ Query: SELECT toUInt128OrDefault('128', CAST('0', 'UInt128')), toUInt128OrDefault('abc', CAST('0', 'UInt128')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2841,7 +2841,7 @@ SELECT toUInt256(256), toUInt256(256.256), toUInt256('256') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2902,7 +2902,7 @@ Query: SELECT toUInt256OrZero('256'), toUInt256OrZero('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -2962,7 +2962,7 @@ Query: SELECT toUInt256OrNull('256'), toUInt256OrNull('abc') -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -3027,7 +3027,7 @@ Query: SELECT toUInt256OrDefault('-256', CAST('0', 'UInt256')), toUInt256OrDefault('abc', CAST('0', 'UInt256')) -FORMAT vertical; +FORMAT Vertical; ``` Result: @@ -3542,173 +3542,312 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDateTime64OrDefault -## toDecimal(32\|64\|128\|256) +## toDecimal32 -Converts `value` to the [Decimal](../data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. +Converts an input value to a value of type [`Decimal(9, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. -- `toDecimal32(value, S)` -- `toDecimal64(value, S)` -- `toDecimal128(value, S)` -- `toDecimal256(value, S)` +**Syntax** -## toDecimal(32\|64\|128\|256)OrNull - -Converts an input string to a [Nullable(Decimal(P,S))](../data-types/decimal.md) data type value. This family of functions includes: - -- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. -- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. -- `toDecimal128OrNull(expr, S)` — Results in `Nullable(Decimal128(S))` data type. -- `toDecimal256OrNull(expr, S)` — Results in `Nullable(Decimal256(S))` data type. - -These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. +```sql +toDecimal32(expr, S) +``` **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32('0xc0fe', 1);`. + +:::note +Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: **Returned value** -A value in the `Nullable(Decimal(P,S))` data type. The value contains: +- Value of type `Decimal(9, S)`. [Decimal32(S)](../data-types/int-uint.md). -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- `NULL`, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. - -**Examples** +**Example** Query: -``` sql -SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val); +```sql +SELECT + toDecimal32(2, 1) AS a, toTypeName(a) AS type_a, + toDecimal32(4.2, 2) AS b, toTypeName(b) AS type_b, + toDecimal32('4.2', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; ``` Result: ```response -┌────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐ -│ -1.111 │ Nullable(Decimal(9, 5)) │ -└────────┴────────────────────────────────────────────────────┘ +Row 1: +────── +a: 2 +type_a: Decimal(9, 1) +b: 4.2 +type_b: Decimal(9, 2) +c: 4.2 +type_c: Decimal(9, 3) ``` -Query: +**See also** -``` sql -SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val); +- [`toDecimal32OrZero`](#todecimal32orzero). +- [`toDecimal32OrNull`](#todecimal32ornull). +- [`toDecimal32OrDefault`](#todecimal32ordefault). + +## toDecimal32OrZero + +Like [`toDecimal32`](#todecimal32), this function converts an input value to a value of type [Decimal(9, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal32OrZero(expr, S) ``` -Result: - -```response -┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐ -│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │ -└──────┴────────────────────────────────────────────────────┘ -``` - - -## toDecimal(32\|64\|128\|256)OrDefault - -Converts an input string to a [Decimal(P,S)](../data-types/decimal.md) data type value. This family of functions includes: - -- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. - -These functions should be used instead of `toDecimal*()` functions, if you prefer to get a default value instead of an exception in the event of an input value parsing error. - **Arguments** -- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrZero('0xc0fe', 1);`. + +:::note +Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: **Returned value** -A value in the `Decimal(P,S)` data type. The value contains: - -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. - -**Examples** - -Query: - -``` sql -SELECT toDecimal32OrDefault(toString(-1.111), 5) AS val, toTypeName(val); -``` - -Result: - -```response -┌────val─┬─toTypeName(toDecimal32OrDefault(toString(-1.111), 5))─┐ -│ -1.111 │ Decimal(9, 5) │ -└────────┴───────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT toDecimal32OrDefault(toString(-1.111), 2) AS val, toTypeName(val); -``` - -Result: - -```response -┌─val─┬─toTypeName(toDecimal32OrDefault(toString(-1.111), 2))─┐ -│ 0 │ Decimal(9, 2) │ -└─────┴───────────────────────────────────────────────────────┘ -``` - -## toDecimal(32\|64\|128\|256)OrZero - -Converts an input value to the [Decimal(P,S)](../data-types/decimal.md) data type. This family of functions includes: - -- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrZero( expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrZero( expr, S)` — Results in `Decimal256(S)` data type. - -These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. - -**Arguments** - -- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. - -**Returned value** - -A value in the `Nullable(Decimal(P,S))` data type. The value contains: - -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- 0 with `S` decimal places, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Value of type `Decimal(9, S)` if successful, otherwise `0` with `S` decimal places. [Decimal32(S)](../data-types/decimal.md). **Example** Query: ``` sql -SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val); +SELECT + toDecimal32OrZero(toString(-1.111), 5) AS val, + toTypeName(val) +FORMAT Vertical; ``` Result: ```response -┌────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐ -│ -1.111 │ Decimal(9, 5) │ -└────────┴────────────────────────────────────────────────────┘ +Row 1: +────── +val: -1.111 +toTypeName(val): Decimal(9, 5) ``` Query: ``` sql -SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val); +SELECT + toDecimal32OrZero(toString(-1.111), 2) AS val, + toTypeName(val) +FORMAT Vertical; ``` Result: ```response -┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐ -│ 0.00 │ Decimal(9, 2) │ -└──────┴────────────────────────────────────────────────────┘ +Row 1: +────── +val: -1.11 +toTypeName(val): Decimal(9, 2) +``` + +**See also** + +- [`toDecimal32`](#todecimal32). +- [`toDecimal32OrNull`](#todecimal32ornull). +- [`toDecimal32OrDefault`](#todecimal32ordefault). + +## toDecimal32OrNull + +Like [`toDecimal32`](#todecimal32), this function converts an input value to a value of type [Nullable(Decimal(9, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal32OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrNull('0xc0fe', 1);`. + +:::note +Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(9, S))` if successful, otherwise value `NULL` of the same type. [Decimal32(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal32OrNull(toString(-1.111), 5) AS val, + toTypeName(val) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +val: -1.111 +toTypeName(val): Nullable(Decimal(9, 5)) +``` + +Query: + +``` sql +SELECT + toDecimal32OrNull(toString(-1.111), 2) AS val, + toTypeName(val) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +val: -1.11 +toTypeName(val): Nullable(Decimal(9, 2)) +``` + +**See also** + +- [`toDecimal32`](#todecimal32). +- [`toDecimal32OrZero`](#todecimal32orzero). +- [`toDecimal32OrDefault`](#todecimal32ordefault). + +## toDecimal32OrDefault + +Like [`toDecimal32`](#todecimal32), this function converts an input value to a value of type [Decimal(9, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal32OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal32(S)` is unsuccessful. [Decimal32(S](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrDefault('0xc0fe', 1);`. + +:::note +Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(9, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal32(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal32OrDefault(toString(-1.111), 5) AS val, + toTypeName(val) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +val: -1.111 +toTypeName(val): Decimal(9, 5) +``` + +Query: + +``` sql +SELECT + toDecimal32OrDefault(toString(-1.111), 2) AS val, + toTypeName(val) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +val: -1.11 +toTypeName(val): Decimal(9, 2) +``` + +Query: + +``` sql +SELECT + toDecimal32OrDefault('Inf', 2, CAST('0', 'Decimal32(2)')) AS val, + toTypeName(val) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +val: 0 +toTypeName(val): Decimal(9, 2) ``` ## toString From 4f2b1c36b7115143a23462282dc5474ed5b90afd Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 7 Aug 2024 11:12:09 +0200 Subject: [PATCH 027/132] Fix typo from previous PR --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 24055bb99b7..5db44da3e2d 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -2362,7 +2362,7 @@ toUInt64('64'): 64 - [`toUInt64OrNull`](#touint64ornull). - [`toUInt64OrDefault`](#touint64ordefault). -## toInt64OrZero +## toUInt64OrZero Like [`toUInt64`](#touint64), this function converts an input value to a value of type [UInt64](../data-types/int-uint.md) but returns `0` in case of an error. From b76e4acbc0a260f5222249a250066c77d2fcaff8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 7 Aug 2024 11:13:56 +0200 Subject: [PATCH 028/132] fix another typo --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 5db44da3e2d..8e72fea7fdb 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1546,7 +1546,7 @@ toInt256OrDefault('abc', CAST('-1', 'Int256')): -1 - [`toInt256OrZero`](#toint256orzero). - [`toInt256OrNull`](#toint256ornull). -# toUInt8 +## toUInt8 Converts an input value to a value of type [`UInt8`](../data-types/int-uint.md). Throws an exception in case of an error. From 6172c56c1fd27f39d11914542f9e2dcb94fffd36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 7 Aug 2024 09:48:38 +0000 Subject: [PATCH 029/132] Split tests to separate vaguely correlated tests --- ...3217_filtering_in_storage_merge.reference} | 6 +---- .../03217_filtering_in_storage_merge.sql | 16 +++++++++++ ...03217_filtering_in_system_tables.reference | 4 +++ ...l => 03217_filtering_in_system_tables.sql} | 27 +++++++------------ 4 files changed, 30 insertions(+), 23 deletions(-) rename tests/queries/0_stateless/{03217_read_rows_in_system_tables.reference => 03217_filtering_in_storage_merge.reference} (54%) create mode 100644 tests/queries/0_stateless/03217_filtering_in_storage_merge.sql create mode 100644 tests/queries/0_stateless/03217_filtering_in_system_tables.reference rename tests/queries/0_stateless/{03217_read_rows_in_system_tables.sql => 03217_filtering_in_system_tables.sql} (55%) diff --git a/tests/queries/0_stateless/03217_read_rows_in_system_tables.reference b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference similarity index 54% rename from tests/queries/0_stateless/03217_read_rows_in_system_tables.reference rename to tests/queries/0_stateless/03217_filtering_in_storage_merge.reference index b21ead49b1e..d366ad04c39 100644 --- a/tests/queries/0_stateless/03217_read_rows_in_system_tables.reference +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference @@ -1,10 +1,6 @@ -information_schema tables -default test_replica_1 r1 Expression ((Project names + Projection)) Aggregating Expression (Before GROUP BY) ReadFromMerge Filter (( + ( + ))) - ReadFromMergeTree (default.test_replica_1) -1 1 -1 1 + ReadFromMergeTree (default.test_03217_merge_replica_1) diff --git a/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql new file mode 100644 index 00000000000..5ecc1e7c672 --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql @@ -0,0 +1,16 @@ +CREATE TABLE test_03217_merge_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r1') + ORDER BY x; +CREATE TABLE test_03217_merge_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r2') + ORDER BY x; + + +CREATE TABLE test_03217_all_replicas (x UInt32) + ENGINE = Merge(currentDatabase(), 'test_03217_merge_replica_*'); + +INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10); +SYSTEM SYNC REPLICA test_03217_merge_replica_2; + +-- If the filter on _table is not applied, then the plan will show both replicas +EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table; diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference new file mode 100644 index 00000000000..218fddf92e0 --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference @@ -0,0 +1,4 @@ +information_schema tables +default test_03217_system_tables_replica_1 r1 +1 1 +1 1 diff --git a/tests/queries/0_stateless/03217_read_rows_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql similarity index 55% rename from tests/queries/0_stateless/03217_read_rows_in_system_tables.sql rename to tests/queries/0_stateless/03217_filtering_in_system_tables.sql index 3bea04ccccf..bbc755e478d 100644 --- a/tests/queries/0_stateless/03217_read_rows_in_system_tables.sql +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -1,27 +1,18 @@ +-- If filtering is not done correctly on databases, then this query report to read 3 rows, which are: `system.tables`, `information_schema.tables` and `INFORMATION_SCHEMA.tables` SELECT database, table FROM system.tables WHERE database = 'information_schema' AND table = 'tables'; --- To verify StorageSystemReplicas applies the filter properly -CREATE TABLE test_replica_1(x UInt32) - ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217/test_replica', 'r1') +CREATE TABLE test_03217_system_tables_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r1') ORDER BY x; -CREATE TABLE test_replica_2(x UInt32) - ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217/test_replica', 'r2') +CREATE TABLE test_03217_system_tables_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r2') ORDER BY x; -SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = 'test_replica_1' AND replica_name = 'r1'; - - --- To verify StorageMerge -CREATE TABLE all_replicas (x UInt32) - ENGINE = Merge(currentDatabase(), 'test_replica_*'); - -INSERT INTO test_replica_1 SELECT number AS x FROM numbers(10); -SYSTEM SYNC REPLICA test_replica_2; --- If the filter not applied, then the plan will show both replicas -EXPLAIN SELECT _table, count() FROM all_replicas WHERE _table = 'test_replica_1' AND x >= 0 GROUP BY _table; +-- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables +SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name = 'r1'; SYSTEM FLUSH LOGS; --- argMin-argMax make the test repeatable +-- argMin-argMax is necessary to make the test repeatable -- StorageSystemTables SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 @@ -30,5 +21,5 @@ SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_ -- StorageSystemReplicas SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 - AND query LIKE '%SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = \'test_replica_1\' AND replica_name = \'r1\';' + AND query LIKE '%SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name = \'r1\';' AND type = 'QueryFinish'; From bcc5201c99e00998beab8088a988a66f921415b6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 30 Jul 2024 17:17:07 +0200 Subject: [PATCH 030/132] init --- src/Common/SystemLogBase.cpp | 138 ++++++++++++-------- src/Common/SystemLogBase.h | 58 +++++--- src/Interpreters/InterpreterSystemQuery.cpp | 9 +- src/Interpreters/SystemLog.cpp | 38 +++--- 4 files changed, 148 insertions(+), 95 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 7d2c15714e2..748cf4744ae 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -27,12 +27,15 @@ #include #include +#define LOGICAL_IF_THEN(A, B) (!(A) || !!(B)) + namespace DB { namespace ErrorCodes { extern const int TIMEOUT_EXCEEDED; + extern const int ABORTED; } ISystemLog::~ISystemLog() = default; @@ -86,9 +89,8 @@ void SystemLogQueue::push(LogElement&& element) // by one, under exclusive lock, so we will see each message count. // It is enough to only wake the flushing thread once, after the message // count increases past half available size. - const uint64_t queue_end = queue_front_index + queue.size(); - requested_flush_up_to = std::max(requested_flush_up_to, queue_end); - + const auto last_log_index = queue_front_index + queue.size(); + requested_flush_index = std::max(requested_flush_index, last_log_index); flush_event.notify_all(); } @@ -127,20 +129,46 @@ template void SystemLogQueue::handleCrash() { if (settings.notify_flush_on_crash) - notifyFlush(/* force */ true); + { + notifyFlush(getLastLogIndex(), /* should_prepare_tables_anyway */ true); + } } template -void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) +void SystemLogQueue::notifyFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) { + std::unique_lock lock(mutex); + // Publish our flush request, taking care not to overwrite the requests + // made by other threads. + force_prepare_tables_requested |= should_prepare_tables_anyway; + requested_flush_index = std::max(requested_flush_index, expected_flushed_index); + flush_event.notify_all(); +} + +template +void SystemLogQueue::waitFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + LOG_DEBUG(log, "Requested flush up to offset {}", expected_flushed_index); + // Use an arbitrary timeout to avoid endless waiting. 60s proved to be // too fast for our parallel functional tests, probably because they // heavily load the disk. const int timeout_seconds = 180; + std::unique_lock lock(mutex); - bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] + + // there is no obligation to call notifyFlush before waitFlush, than we have to be sure that flush_event has been triggered + force_prepare_tables_requested |= should_prepare_tables_anyway; + if (requested_flush_index < expected_flushed_index) { - return flushed_up_to >= expected_flushed_up_to && !is_force_prepare_tables; + requested_flush_index = expected_flushed_index; + flush_event.notify_all(); + } + + auto result = confirm_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] + { + const bool if_should_prepare_then_it_is_done = LOGICAL_IF_THEN(should_prepare_tables_anyway, prepare_tables_done); + return (flushed_index >= expected_flushed_index && if_should_prepare_then_it_is_done) || is_shutdown; }); if (!result) @@ -148,67 +176,54 @@ void SystemLogQueue::waitFlush(uint64_t expected_flushed_up_to) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout exceeded ({} s) while flushing system log '{}'.", toString(timeout_seconds), demangle(typeid(*this).name())); } -} - -template -uint64_t SystemLogQueue::notifyFlush(bool should_prepare_tables_anyway) -{ - uint64_t this_thread_requested_offset; + if (is_shutdown) { - std::lock_guard lock(mutex); - if (is_shutdown) - return uint64_t(-1); - - this_thread_requested_offset = queue_front_index + queue.size(); - - // Publish our flush request, taking care not to overwrite the requests - // made by other threads. - is_force_prepare_tables |= should_prepare_tables_anyway; - requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); - - flush_event.notify_all(); + throw Exception(ErrorCodes::ABORTED, "Shutdown has been called while flushing system log '{}'. Aborting.", + demangle(typeid(*this).name())); } - - LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); - return this_thread_requested_offset; } template -void SystemLogQueue::confirm(uint64_t to_flush_end) +SystemLogQueue::Index SystemLogQueue::getLastLogIndex() { std::lock_guard lock(mutex); - flushed_up_to = to_flush_end; - is_force_prepare_tables = false; - flush_event.notify_all(); + return queue_front_index + queue.size(); } template -typename SystemLogQueue::Index SystemLogQueue::pop(std::vector & output, - bool & should_prepare_tables_anyway, - bool & exit_this_thread) +void SystemLogQueue::confirm(SystemLogQueue::Index last_flashed_index) { - /// Call dtors and deallocate strings without holding the global lock - output.resize(0); + std::lock_guard lock(mutex); + prepare_tables_done = true; + flushed_index = std::max(flushed_index, last_flashed_index); + confirm_event.notify_all(); +} +template +typename SystemLogQueue::PopResult SystemLogQueue::pop() +{ std::unique_lock lock(mutex); - flush_event.wait_for(lock, - std::chrono::milliseconds(settings.flush_interval_milliseconds), - [&] () - { - return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables; - } - ); + + flush_event.wait_for(lock, std::chrono::milliseconds(settings.flush_interval_milliseconds), [&] () + { + const bool if_prepare_requested_and_it_is_not_done = force_prepare_tables_requested && !prepare_tables_done; + return requested_flush_index > flushed_index || if_prepare_requested_and_it_is_not_done || is_shutdown; + }); + + if (is_shutdown) + return PopResult{.is_shutdown = true}; queue_front_index += queue.size(); - // Swap with existing array from previous flush, to save memory - // allocations. - queue.swap(output); - should_prepare_tables_anyway = is_force_prepare_tables; + PopResult result; + result.logs_index = queue_front_index; + result.logs_elemets.swap(queue); - exit_this_thread = is_shutdown; - return queue_front_index; + const bool if_prepare_requested_and_it_is_not_done = force_prepare_tables_requested && !prepare_tables_done; + result.create_table_force = if_prepare_requested_and_it_is_not_done; + + return result; } template @@ -229,13 +244,21 @@ SystemLogBase::SystemLogBase( } template -void SystemLogBase::flush(bool force) +SystemLogBase::Index SystemLogBase::getLastLogIndex() { - uint64_t this_thread_requested_offset = queue->notifyFlush(force); - if (this_thread_requested_offset == uint64_t(-1)) - return; + return queue->getLastLogIndex(); +} - queue->waitFlush(this_thread_requested_offset); +template +void SystemLogBase::notifyFlush(Index expected_flushed_index) +{ + queue->notifyFlush(expected_flushed_index, /* should_prepare_tables_anyway */ true); +} + +template +void SystemLogBase::flush(Index expected_flushed_index) +{ + queue->waitFlush(expected_flushed_index, /* should_prepare_tables_anyway */ true); } template @@ -257,9 +280,6 @@ void SystemLogBase::add(LogElement element) queue->push(std::move(element)); } -template -void SystemLogBase::notifyFlush(bool force) { queue->notifyFlush(force); } - #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) @@ -267,3 +287,5 @@ SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE) } + +#undef LOGICAL_IF_THEN diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index b87fcf419d3..6c60ffafc4c 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -54,10 +54,19 @@ struct StorageID; class ISystemLog { public: + using Index = uint64_t; + virtual String getName() const = 0; - //// force -- force table creation (used for SYSTEM FLUSH LOGS) - virtual void flush(bool force = false) = 0; /// NOLINT + /// Return the index of the lastest added log element. That index no less than the flashed index. + /// The flashed index is the index of the last log element which has been flushed successfully. + /// Thereby all the records whose index is less than the flashed index are flushed already. + virtual Index getLastLogIndex() = 0; + /// Call this method to wake up the flush thread and flush the data in the background. It is non blocking call + virtual void notifyFlush(Index expected_flushed_index) = 0; + /// Call this method to wait intill the logs are flushed up to expected_flushed_index. It is blocking call. + virtual void flush(Index expected_flushed_index) = 0; + virtual void prepareTable() = 0; /// Start the background thread. @@ -97,24 +106,34 @@ struct SystemLogQueueSettings template class SystemLogQueue { - using Index = uint64_t; - public: + using Index = ISystemLog::Index; + explicit SystemLogQueue(const SystemLogQueueSettings & settings_); void shutdown(); // producer methods void push(LogElement && element); - Index notifyFlush(bool should_prepare_tables_anyway); - void waitFlush(Index expected_flushed_up_to); + + Index getLastLogIndex(); + void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway); + void waitFlush(Index expected_flushed_index, bool should_prepare_tables_anyway); /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash(); + struct PopResult + { + Index logs_index = 0; + std::vector logs_elemets = {}; + bool create_table_force = false; + bool is_shutdown = false; + }; + // consumer methods - Index pop(std::vector& output, bool & should_prepare_tables_anyway, bool & exit_this_thread); - void confirm(Index to_flush_end); + PopResult pop(); + void confirm(Index last_flashed_index); private: /// Data shared between callers of add()/flush()/shutdown(), and the saving thread @@ -124,22 +143,29 @@ private: // Queue is bounded. But its size is quite large to not block in all normal cases. std::vector queue; + // An always-incrementing index of the first message currently in the queue. // We use it to give a global sequential index to every message, so that we // can wait until a particular message is flushed. This is used to implement // synchronous log flushing for SYSTEM FLUSH LOGS. Index queue_front_index = 0; + // A flag that says we must create the tables even if the queue is empty. - bool is_force_prepare_tables = false; + bool force_prepare_tables_requested = false; + bool prepare_tables_done = false; + // Requested to flush logs up to this index, exclusive - Index requested_flush_up_to = 0; + Index requested_flush_index = 0; + // Flushed log up to this index, exclusive - Index flushed_up_to = 0; + Index flushed_index = 0; + // Logged overflow message at this queue front index Index logged_queue_full_at_index = -1; bool is_shutdown = false; + std::condition_variable confirm_event; std::condition_variable flush_event; const SystemLogQueueSettings settings; @@ -150,6 +176,7 @@ template class SystemLogBase : public ISystemLog { public: + using Index = ISystemLog::Index; using Self = SystemLogBase; explicit SystemLogBase( @@ -163,15 +190,16 @@ public: */ void add(LogElement element); + Index getLastLogIndex() override; + + void notifyFlush(Index expected_flushed_index) override; + /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. - void flush(bool force) override; + void flush(Index expected_flushed_index) override; /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash() override; - /// Non-blocking flush data in the buffer to disk. - void notifyFlush(bool force); - String getName() const override { return LogElement::name(); } static const char * getDefaultOrderBy() { return "event_date, event_time"; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c284acfa308..9b483bac25c 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -712,11 +712,18 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); auto logs = getContext()->getSystemLogs(); + std::vector> commands; commands.reserve(logs.size()); for (auto * system_log : logs) - commands.emplace_back([system_log] { system_log->flush(true); }); + { + auto current_index = system_log->getLastLogIndex(); + /// The data is started to being flushed in the background after notifyFlush call + system_log->notifyFlush(current_index); + commands.emplace_back([system_log, current_index] { system_log->flush(current_index); }); + } + /// The data is flashing in the background, we need to wait until it is done executeCommandsAndThrowIfError(commands); break; } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 572481e6b12..7042564799a 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -1,6 +1,7 @@ #include #include +#include "Common/SystemLogBase.h" #include #include #include @@ -462,33 +463,26 @@ void SystemLog::savingThreadFunction() { setThreadName("SystemLogFlush"); - std::vector to_flush; - bool exit_this_thread = false; - while (!exit_this_thread) + while (true) { try { - // The end index (exclusive, like std end()) of the messages we are - // going to flush. - uint64_t to_flush_end = 0; - // Should we prepare table even if there are no new messages. - bool should_prepare_tables_anyway = false; + auto result = queue->pop(); - to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread); - - if (to_flush.empty()) + if (result.is_shutdown) { - if (should_prepare_tables_anyway) - { - prepareTable(); - LOG_TRACE(log, "Table created (force)"); - - queue->confirm(to_flush_end); - } + LOG_TRACE(log, "Terminating"); + return; } - else + + if (!result.logs_elemets.empty()) { - flushImpl(to_flush, to_flush_end); + flushImpl(result.logs_elemets, result.logs_index); + } + else if (result.create_table_force) + { + prepareTable(); + queue->confirm(/* last_flashed_index */ 0); } } catch (...) @@ -496,7 +490,6 @@ void SystemLog::savingThreadFunction() tryLogCurrentException(__PRETTY_FUNCTION__); } } - LOG_TRACE(log, "Terminating"); } @@ -579,6 +572,9 @@ StoragePtr SystemLog::getStorage() const template void SystemLog::prepareTable() { + if (is_prepared) + return; + String description = table_id.getNameForLogs(); auto table = getStorage(); From aa42ccf0531aa416c5525b6d8c01c057f781b0e3 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 30 Jul 2024 19:04:22 +0200 Subject: [PATCH 031/132] move LOGICAL_IF_THEN to base/defines.h --- base/base/defines.h | 2 ++ src/Common/SystemLogBase.cpp | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index 5685a6d9833..7860cebd359 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -164,3 +164,5 @@ template constexpr void UNUSED(Args &&... args [[maybe_unused]]) // NOLINT(cppcoreguidelines-missing-std-forward) { } + +#define LOGICAL_IF_THEN(A, B) (!(A) || !!(B)) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 748cf4744ae..6840c461ce6 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -27,7 +27,6 @@ #include #include -#define LOGICAL_IF_THEN(A, B) (!(A) || !!(B)) namespace DB { @@ -287,5 +286,3 @@ SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_QUEUE) } - -#undef LOGICAL_IF_THEN From abd5dfe1d0c7b88212a2f0dc4ed1a8b470dcedb1 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 30 Jul 2024 19:12:13 +0200 Subject: [PATCH 032/132] fix typo --- src/Common/SystemLogBase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 6c60ffafc4c..3915b99f8aa 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -58,7 +58,7 @@ public: virtual String getName() const = 0; - /// Return the index of the lastest added log element. That index no less than the flashed index. + /// Return the index of the latest added log element. That index no less than the flashed index. /// The flashed index is the index of the last log element which has been flushed successfully. /// Thereby all the records whose index is less than the flashed index are flushed already. virtual Index getLastLogIndex() = 0; From 8e5577ad8f8cb4e4d3ccb977af8536957088b8ca Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 30 Jul 2024 19:18:03 +0200 Subject: [PATCH 033/132] fix includes --- src/Interpreters/SystemLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 7042564799a..c236b524c60 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -1,7 +1,7 @@ #include #include -#include "Common/SystemLogBase.h" +#include #include #include #include From 86267418f9a74915d5089770d658b328684b9189 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 31 Jul 2024 23:28:03 +0200 Subject: [PATCH 034/132] fix tests, rework recreation tables conditions, add log about ignored logs --- base/base/defines.h | 2 - src/Backups/BackupsWorker.cpp | 2 + src/Common/SystemLogBase.cpp | 104 +++++++++--------- src/Common/SystemLogBase.h | 28 +++-- src/Interpreters/SystemLog.cpp | 9 +- .../test_system_flush_logs/test.py | 25 ++++- .../test_system_logs_recreate/test.py | 13 ++- 7 files changed, 105 insertions(+), 78 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index 7860cebd359..5685a6d9833 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -164,5 +164,3 @@ template constexpr void UNUSED(Args &&... args [[maybe_unused]]) // NOLINT(cppcoreguidelines-missing-std-forward) { } - -#define LOGICAL_IF_THEN(A, B) (!(A) || !!(B)) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 0b93ae6d547..363aaae9c8d 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -1171,6 +1171,8 @@ void BackupsWorker::waitAll() for (const auto & id : current_operations) wait(id, /* rethrow_exception= */ false); + backup_log->flush(backup_log->getLastLogIndex()); + LOG_INFO(log, "Backups and restores finished"); } diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 6840c461ce6..a35a46c49cc 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -88,31 +88,18 @@ void SystemLogQueue::push(LogElement&& element) // by one, under exclusive lock, so we will see each message count. // It is enough to only wake the flushing thread once, after the message // count increases past half available size. + const auto last_log_index = queue_front_index + queue.size(); - requested_flush_index = std::max(requested_flush_index, last_log_index); - flush_event.notify_all(); + notifyFlushUnlocked(last_log_index, /* should_prepare_tables_anyway */ false); } if (queue.size() >= settings.max_size_rows) { + chassert(queue.size() == settings.max_size_rows); + // Ignore all further entries until the queue is flushed. - // Log a message about that. Don't spam it -- this might be especially - // problematic in case of trace log. Remember what the front index of the - // queue was when we last logged the message. If it changed, it means the - // queue was flushed, and we can log again. - if (queue_front_index != logged_queue_full_at_index) - { - logged_queue_full_at_index = queue_front_index; - - // TextLog sets its logger level to 0, so this log is a noop and - // there is no recursive logging. - lock.unlock(); - LOG_ERROR(log, "Queue is full for system log '{}' at {}. max_size_rows {}", - demangle(typeid(*this).name()), - queue_front_index, - settings.max_size_rows); - } - + // To the next batch we add a log message about how much we have lost + ++ignored_logs; return; } @@ -133,15 +120,22 @@ void SystemLogQueue::handleCrash() } } +template +void SystemLogQueue::notifyFlushUnlocked(Index expected_flushed_index, bool should_prepare_tables_anyway) +{ + if (should_prepare_tables_anyway) + requested_prepare_tables = std::max(requested_prepare_tables, expected_flushed_index); + + requested_flush_index = std::max(requested_flush_index, expected_flushed_index); + + flush_event.notify_all(); +} + template void SystemLogQueue::notifyFlush(SystemLogQueue::Index expected_flushed_index, bool should_prepare_tables_anyway) { - std::unique_lock lock(mutex); - // Publish our flush request, taking care not to overwrite the requests - // made by other threads. - force_prepare_tables_requested |= should_prepare_tables_anyway; - requested_flush_index = std::max(requested_flush_index, expected_flushed_index); - flush_event.notify_all(); + std::lock_guard lock(mutex); + notifyFlushUnlocked(expected_flushed_index, should_prepare_tables_anyway); } template @@ -156,18 +150,15 @@ void SystemLogQueue::waitFlush(SystemLogQueue::Index exp std::unique_lock lock(mutex); - // there is no obligation to call notifyFlush before waitFlush, than we have to be sure that flush_event has been triggered - force_prepare_tables_requested |= should_prepare_tables_anyway; - if (requested_flush_index < expected_flushed_index) - { - requested_flush_index = expected_flushed_index; - flush_event.notify_all(); - } + // there is no obligation to call notifyFlush before waitFlush, than we have to be sure that flush_event has been triggered before we wait the result + notifyFlushUnlocked(expected_flushed_index, should_prepare_tables_anyway); auto result = confirm_event.wait_for(lock, std::chrono::seconds(timeout_seconds), [&] { - const bool if_should_prepare_then_it_is_done = LOGICAL_IF_THEN(should_prepare_tables_anyway, prepare_tables_done); - return (flushed_index >= expected_flushed_index && if_should_prepare_then_it_is_done) || is_shutdown; + if (should_prepare_tables_anyway) + return (flushed_index >= expected_flushed_index && prepared_tables >= requested_prepare_tables) || is_shutdown; + else + return (flushed_index >= expected_flushed_index) || is_shutdown; }); if (!result) @@ -194,7 +185,7 @@ template void SystemLogQueue::confirm(SystemLogQueue::Index last_flashed_index) { std::lock_guard lock(mutex); - prepare_tables_done = true; + prepared_tables = std::max(prepared_tables, last_flashed_index); flushed_index = std::max(flushed_index, last_flashed_index); confirm_event.notify_all(); } @@ -202,25 +193,34 @@ void SystemLogQueue::confirm(SystemLogQueue::Index last_ template typename SystemLogQueue::PopResult SystemLogQueue::pop() { - std::unique_lock lock(mutex); - - flush_event.wait_for(lock, std::chrono::milliseconds(settings.flush_interval_milliseconds), [&] () - { - const bool if_prepare_requested_and_it_is_not_done = force_prepare_tables_requested && !prepare_tables_done; - return requested_flush_index > flushed_index || if_prepare_requested_and_it_is_not_done || is_shutdown; - }); - - if (is_shutdown) - return PopResult{.is_shutdown = true}; - - queue_front_index += queue.size(); - PopResult result; - result.logs_index = queue_front_index; - result.logs_elemets.swap(queue); + size_t prev_ignored_logs = 0; - const bool if_prepare_requested_and_it_is_not_done = force_prepare_tables_requested && !prepare_tables_done; - result.create_table_force = if_prepare_requested_and_it_is_not_done; + { + std::unique_lock lock(mutex); + + flush_event.wait_for(lock, std::chrono::milliseconds(settings.flush_interval_milliseconds), [&] () + { + return requested_flush_index > flushed_index || requested_prepare_tables > prepared_tables || is_shutdown; + }); + + if (is_shutdown) + return PopResult{.is_shutdown = true}; + + queue_front_index += queue.size(); + prev_ignored_logs = ignored_logs; + ignored_logs = 0; + + result.last_log_index = queue_front_index; + result.logs.swap(queue); + result.create_table_force = requested_prepare_tables > prepared_tables; + } + + if (prev_ignored_logs) + LOG_ERROR(log, "Queue had been full at {}, accepted {} logs, ignored {} logs.", + result.last_log_index - result.logs.size(), + result.logs.size(), + prev_ignored_logs); return result; } diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 3915b99f8aa..c359287a73f 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -54,7 +55,7 @@ struct StorageID; class ISystemLog { public: - using Index = uint64_t; + using Index = int64_t; virtual String getName() const = 0; @@ -125,8 +126,8 @@ public: struct PopResult { - Index logs_index = 0; - std::vector logs_elemets = {}; + Index last_log_index = 0; + std::vector logs = {}; bool create_table_force = false; bool is_shutdown = false; }; @@ -136,6 +137,8 @@ public: void confirm(Index last_flashed_index); private: + void notifyFlushUnlocked(Index expected_flushed_index, bool should_prepare_tables_anyway); + /// Data shared between callers of add()/flush()/shutdown(), and the saving thread std::mutex mutex; @@ -150,18 +153,21 @@ private: // synchronous log flushing for SYSTEM FLUSH LOGS. Index queue_front_index = 0; - // A flag that says we must create the tables even if the queue is empty. - bool force_prepare_tables_requested = false; - bool prepare_tables_done = false; - // Requested to flush logs up to this index, exclusive - Index requested_flush_index = 0; - + Index requested_flush_index = std::numeric_limits::min(); // Flushed log up to this index, exclusive Index flushed_index = 0; - // Logged overflow message at this queue front index - Index logged_queue_full_at_index = -1; + // The same logic for the prepare tables: if requested_prepar_tables > prepared_tables we need to do prepare + // except that initial prepared_tables is -1 + // it is due to the difference: when no logs have been written and we call flush logs + // it becomes in the state: requested_flush_index = 0 and flushed_index = 0 -- we do not want to do anything + // but if we need to prepare tables it becomes requested_prepare_tables = 0 and prepared_tables = -1 + // we trigger background thread and do prepare + Index requested_prepare_tables = std::numeric_limits::min(); + Index prepared_tables = -1; + + size_t ignored_logs = 0; bool is_shutdown = false; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index c236b524c60..9d07184a0e5 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -475,14 +475,14 @@ void SystemLog::savingThreadFunction() return; } - if (!result.logs_elemets.empty()) + if (!result.logs.empty()) { - flushImpl(result.logs_elemets, result.logs_index); + flushImpl(result.logs, result.last_log_index); } else if (result.create_table_force) { prepareTable(); - queue->confirm(/* last_flashed_index */ 0); + queue->confirm(result.last_log_index); } } catch (...) @@ -572,9 +572,6 @@ StoragePtr SystemLog::getStorage() const template void SystemLog::prepareTable() { - if (is_prepared) - return; - String description = table_id.getNameForLogs(); auto table = getStorage(); diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 713b327eb76..0399122406a 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -4,7 +4,7 @@ import pytest from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry +from helpers.test_tools import assert_eq_with_retry, assert_logs_contain_with_retry, TSV cluster = ClickHouseCluster(__file__) node = cluster.add_instance( @@ -75,6 +75,8 @@ def test_system_suspend(): def test_log_max_size(start_cluster): + # we do misconfiguration here: buffer_size_rows_flush_threshold > max_size_rows, flush_interval_milliseconds is huge + # no auto flush by size not by time has a chance node.exec_in_container( [ "bash", @@ -83,6 +85,7 @@ def test_log_max_size(start_cluster): 1000000 + 1000000 10 10 @@ -91,11 +94,23 @@ def test_log_max_size(start_cluster): """, ] ) - node.restart_clickhouse() - for i in range(10): - node.query(f"select {i}") - assert node.query("select count() >= 10 from system.query_log") == "1\n" + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + node.restart_clickhouse() + + # all logs records above max_size_rows are lost + # The accepted logs records are never flushed until system flush logs is called by us + for i in range(21): + node.query(f"select {i}") + node.query("system flush logs") + + assert_logs_contain_with_retry( + node, "Queue had been full at 0, accepted 10 logs, ignored 34 logs." + ) + assert node.query( + "select count() >= 10, count() < 20 from system.query_log" + ) == TSV([[1, 1]]) + node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] ) diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 1bdb1fe3261..1a4ed31278d 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -173,11 +173,20 @@ def test_drop_system_log(): node.query("system flush logs") node.query("select 2") node.query("system flush logs") - assert node.query("select count() > 0 from system.query_log") == "1\n" + assert node.query("select count() >= 2 from system.query_log") == "1\n" + node.query("drop table system.query_log sync") node.query("select 3") node.query("system flush logs") - assert node.query("select count() > 0 from system.query_log") == "1\n" + assert node.query("select count() >= 1 from system.query_log") == "1\n" + + node.query("drop table system.query_log sync") + node.restart_clickhouse() + node.query("system flush logs") + assert ( + node.query("select count() >= 0 from system.query_log") == "1\n" + ) # we check that query_log just exists + node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] ) From 00efd5fc0da7124cb1671487250476185101a488 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 22:23:16 +0000 Subject: [PATCH 035/132] Automatic style fix --- tests/integration/test_system_logs_recreate/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 1a4ed31278d..d5347ae7dea 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -185,7 +185,7 @@ def test_drop_system_log(): node.query("system flush logs") assert ( node.query("select count() >= 0 from system.query_log") == "1\n" - ) # we check that query_log just exists + ) # we check that query_log just exists node.exec_in_container( ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] From e3290c782066aaf7d1891d865547706387bf773c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 1 Aug 2024 16:58:17 +0200 Subject: [PATCH 036/132] rework Context::getSystemLogs, add system logs flush at shutdown --- src/Backups/BackupsWorker.cpp | 4 +- src/Common/SystemLogBase.cpp | 8 +- src/Common/SystemLogBase.h | 8 +- src/Interpreters/Context.cpp | 7 +- src/Interpreters/Context.h | 4 +- src/Interpreters/InterpreterSystemQuery.cpp | 17 +--- src/Interpreters/SystemLog.cpp | 86 ++++++++-------- src/Interpreters/SystemLog.h | 104 +++++++++----------- 8 files changed, 109 insertions(+), 129 deletions(-) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 363aaae9c8d..106aa89082d 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -1171,7 +1171,7 @@ void BackupsWorker::waitAll() for (const auto & id : current_operations) wait(id, /* rethrow_exception= */ false); - backup_log->flush(backup_log->getLastLogIndex()); + backup_log->flush(backup_log->getLastLogIndex(), /* should_prepare_tables_anyway */ false); LOG_INFO(log, "Backups and restores finished"); } @@ -1223,6 +1223,8 @@ void BackupsWorker::cancelAll(bool wait_) for (const auto & id : current_operations) wait(id, /* rethrow_exception= */ false); + backup_log->flush(backup_log->getLastLogIndex(), /* should_prepare_tables_anyway */ false); + LOG_INFO(log, "Backups and restores finished or stopped"); } diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index a35a46c49cc..127c8862a35 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -249,15 +249,15 @@ SystemLogBase::Index SystemLogBase::getLastLogIndex() } template -void SystemLogBase::notifyFlush(Index expected_flushed_index) +void SystemLogBase::notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) { - queue->notifyFlush(expected_flushed_index, /* should_prepare_tables_anyway */ true); + queue->notifyFlush(expected_flushed_index, should_prepare_tables_anyway); } template -void SystemLogBase::flush(Index expected_flushed_index) +void SystemLogBase::flush(Index expected_flushed_index, bool should_prepare_tables_anyway) { - queue->waitFlush(expected_flushed_index, /* should_prepare_tables_anyway */ true); + queue->waitFlush(expected_flushed_index, should_prepare_tables_anyway); } template diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index c359287a73f..0d7b04d5c57 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -64,9 +64,9 @@ public: /// Thereby all the records whose index is less than the flashed index are flushed already. virtual Index getLastLogIndex() = 0; /// Call this method to wake up the flush thread and flush the data in the background. It is non blocking call - virtual void notifyFlush(Index expected_flushed_index) = 0; + virtual void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) = 0; /// Call this method to wait intill the logs are flushed up to expected_flushed_index. It is blocking call. - virtual void flush(Index expected_flushed_index) = 0; + virtual void flush(Index expected_flushed_index, bool should_prepare_tables_anyway) = 0; virtual void prepareTable() = 0; @@ -198,10 +198,10 @@ public: Index getLastLogIndex() override; - void notifyFlush(Index expected_flushed_index) override; + void notifyFlush(Index expected_flushed_index, bool should_prepare_tables_anyway) override; /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. - void flush(Index expected_flushed_index) override; + void flush(Index expected_flushed_index, bool should_prepare_tables_anyway) override; /// Handles crash, flushes log without blocking if notify_flush_on_crash is set void handleCrash() override; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5413b568068..3051ed3e567 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -618,7 +619,7 @@ struct ContextSharedPart : boost::noncopyable /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ - SHUTDOWN(log, "system logs", system_logs, shutdown()); + SHUTDOWN(log, "system logs", system_logs, flushAndShutdown()); LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); @@ -4312,13 +4313,13 @@ std::shared_ptr Context::getBlobStorageLog() const return shared->system_logs->blob_storage_log; } -std::vector Context::getSystemLogs() const +SystemLogs Context::getSystemLogs() const { SharedLockGuard lock(shared->mutex); if (!shared->system_logs) return {}; - return shared->system_logs->logs; + return *shared->system_logs; } std::optional Context::getDashboards() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d5e35c3e4b3..3da4f124553 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -48,6 +48,8 @@ namespace DB class ASTSelectQuery; +class SystemLogs; + struct ContextSharedPart; class ContextAccess; class ContextAccessWrapper; @@ -1150,7 +1152,7 @@ public: std::shared_ptr getBackupLog() const; std::shared_ptr getBlobStorageLog() const; - std::vector getSystemLogs() const; + SystemLogs getSystemLogs() const; using Dashboards = std::vector>; std::optional getDashboards() const; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9b483bac25c..ef6d1040c5e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -710,21 +710,8 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); - - auto logs = getContext()->getSystemLogs(); - - std::vector> commands; - commands.reserve(logs.size()); - for (auto * system_log : logs) - { - auto current_index = system_log->getLastLogIndex(); - /// The data is started to being flushed in the background after notifyFlush call - system_log->notifyFlush(current_index); - commands.emplace_back([system_log, current_index] { system_log->flush(current_index); }); - } - - /// The data is flashing in the background, we need to wait until it is done - executeCommandsAndThrowIfError(commands); + auto system_logs = getContext()->getSystemLogs(); + system_logs.flush(true); break; } case Type::STOP_LISTEN: diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 9d07184a0e5..9b58da3f545 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -50,6 +50,7 @@ #include + namespace DB { @@ -312,56 +313,13 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); - if (query_log) - logs.emplace_back(query_log.get()); - if (query_thread_log) - logs.emplace_back(query_thread_log.get()); - if (part_log) - logs.emplace_back(part_log.get()); - if (trace_log) - logs.emplace_back(trace_log.get()); - if (crash_log) - logs.emplace_back(crash_log.get()); - if (text_log) - logs.emplace_back(text_log.get()); - if (metric_log) - logs.emplace_back(metric_log.get()); - if (error_log) - logs.emplace_back(error_log.get()); - if (asynchronous_metric_log) - logs.emplace_back(asynchronous_metric_log.get()); - if (opentelemetry_span_log) - logs.emplace_back(opentelemetry_span_log.get()); - if (query_views_log) - logs.emplace_back(query_views_log.get()); - if (zookeeper_log) - logs.emplace_back(zookeeper_log.get()); if (session_log) - { - logs.emplace_back(session_log.get()); global_context->addWarningMessage("Table system.session_log is enabled. It's unreliable and may contain garbage. Do not use it for any kind of security monitoring."); - } - if (transactions_info_log) - logs.emplace_back(transactions_info_log.get()); - if (processors_profile_log) - logs.emplace_back(processors_profile_log.get()); - if (filesystem_cache_log) - logs.emplace_back(filesystem_cache_log.get()); - if (filesystem_read_prefetches_log) - logs.emplace_back(filesystem_read_prefetches_log.get()); - if (asynchronous_insert_log) - logs.emplace_back(asynchronous_insert_log.get()); - if (backup_log) - logs.emplace_back(backup_log.get()); - if (s3_queue_log) - logs.emplace_back(s3_queue_log.get()); - if (blob_storage_log) - logs.emplace_back(blob_storage_log.get()); bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { - for (auto & log : logs) + for (auto & log : getAllLogs()) { log->startup(); if (should_prepare) @@ -395,20 +353,56 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf } } - -SystemLogs::~SystemLogs() +std::vector SystemLogs::getAllLogs() const { +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define GET_RAW_POINTERS(log_type, member, descr) \ + member.get(), \ + + std::vector result = { + LIST_OF_ALL_SYSTEM_LOGS(GET_RAW_POINTERS) + }; +#undef GET_RAW_POINTERS +/// NOLINTEND(bugprone-macro-parentheses) + + auto last_it = std::remove(result.begin(), result.end(), nullptr); + result.erase(last_it, result.end()); + + return result; +} + +void SystemLogs::flush(bool should_prepare_tables_anyway) +{ + auto logs = getAllLogs(); + std::vector logs_indexes(logs.size(), 0); + + for (size_t i = 0; i < logs.size(); ++i) + { + auto last_log_index = logs[i]->getLastLogIndex(); + logs_indexes[i] = last_log_index; + logs[i]->notifyFlush(last_log_index, should_prepare_tables_anyway); + } + + for (size_t i = 0; i < logs.size(); ++i) + logs[i]->flush(logs_indexes[i], should_prepare_tables_anyway); +} + +void SystemLogs::flushAndShutdown() +{ + flush(/* should_prepare_tables_anyway */ false); shutdown(); } void SystemLogs::shutdown() { + auto logs = getAllLogs(); for (auto & log : logs) log->shutdown(); } void SystemLogs::handleCrash() { + auto logs = getAllLogs(); for (auto & log : logs) log->handleCrash(); } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 0ac468b15ec..093be203282 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -5,6 +5,32 @@ #include #include +#include + +#define LIST_OF_ALL_SYSTEM_LOGS(M) \ + M(QueryLog, query_log, "Used to log queries.") \ + M(QueryThreadLog, query_thread_log, "Used to log query threads.") \ + M(PartLog, part_log, "Used to log operations with parts.") \ + M(TraceLog, trace_log, "Used to log traces from query profiler.") \ + M(CrashLog, crash_log, "Used to log server crashes.") \ + M(TextLog, text_log, "Used to log all text messages.") \ + M(MetricLog, metric_log, "Used to log all metrics.") \ + M(ErrorLog, error_log, "Used to log errors.") \ + M(FilesystemCacheLog, filesystem_cache_log, "") \ + M(FilesystemReadPrefetchesLog, filesystem_read_prefetches_log, "") \ + M(ObjectStorageQueueLog, s3_queue_log, "") \ + M(ObjectStorageQueueLog, azure_queue_log, "") \ + M(AsynchronousMetricLog, asynchronous_metric_log, "Metrics from system.asynchronous_metrics") \ + M(OpenTelemetrySpanLog, opentelemetry_span_log, "OpenTelemetry trace spans.") \ + M(QueryViewsLog, query_views_log, "Used to log queries of materialized and live views.") \ + M(ZooKeeperLog, zookeeper_log, "Used to log all actions of ZooKeeper client.") \ + M(SessionLog, session_log, "Login, LogOut and Login failure events.") \ + M(TransactionsInfoLog, transactions_info_log, "Events related to transactions.") \ + M(ProcessorsProfileLog, processors_profile_log, "Used to log processors profiling") \ + M(AsynchronousInsertLog, asynchronous_insert_log, "") \ + M(BackupLog, backup_log, "Backup and restore events") \ + M(BlobStorageLog, blob_storage_log, "Log blob storage operations") \ + namespace DB { @@ -34,71 +60,39 @@ namespace DB }; */ -class QueryLog; -class QueryThreadLog; -class PartLog; -class TextLog; -class TraceLog; -class CrashLog; -class ErrorLog; -class MetricLog; -class AsynchronousMetricLog; -class OpenTelemetrySpanLog; -class QueryViewsLog; -class ZooKeeperLog; -class SessionLog; -class TransactionsInfoLog; -class ProcessorsProfileLog; -class FilesystemCacheLog; -class FilesystemReadPrefetchesLog; -class AsynchronousInsertLog; -class BackupLog; -class ObjectStorageQueueLog; -class BlobStorageLog; +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define FORWARD_DECLARATION(log_type, member, descr) \ + class log_type; \ + +LIST_OF_ALL_SYSTEM_LOGS(FORWARD_DECLARATION) +#undef FORWARD_DECLARATION +/// NOLINTEND(bugprone-macro-parentheses) + /// System logs should be destroyed in destructor of the last Context and before tables, /// because SystemLog destruction makes insert query while flushing data into underlying tables -struct SystemLogs +class SystemLogs { +public: + SystemLogs() = default; SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config); - ~SystemLogs(); + SystemLogs(const SystemLogs & other) = default; + void flush(bool should_prepare_tables_anyway); + void flushAndShutdown(); void shutdown(); void handleCrash(); - std::shared_ptr query_log; /// Used to log queries. - std::shared_ptr query_thread_log; /// Used to log query threads. - std::shared_ptr part_log; /// Used to log operations with parts - std::shared_ptr trace_log; /// Used to log traces from query profiler - std::shared_ptr crash_log; /// Used to log server crashes. - std::shared_ptr text_log; /// Used to log all text messages. - std::shared_ptr metric_log; /// Used to log all metrics. - std::shared_ptr error_log; /// Used to log errors. - std::shared_ptr filesystem_cache_log; - std::shared_ptr filesystem_read_prefetches_log; - std::shared_ptr s3_queue_log; - std::shared_ptr azure_queue_log; - /// Metrics from system.asynchronous_metrics. - std::shared_ptr asynchronous_metric_log; - /// OpenTelemetry trace spans. - std::shared_ptr opentelemetry_span_log; - /// Used to log queries of materialized and live views - std::shared_ptr query_views_log; - /// Used to log all actions of ZooKeeper client - std::shared_ptr zookeeper_log; - /// Login, LogOut and Login failure events - std::shared_ptr session_log; - /// Events related to transactions - std::shared_ptr transactions_info_log; - /// Used to log processors profiling - std::shared_ptr processors_profile_log; - std::shared_ptr asynchronous_insert_log; - /// Backup and restore events - std::shared_ptr backup_log; - /// Log blob storage operations - std::shared_ptr blob_storage_log; +/// NOLINTBEGIN(bugprone-macro-parentheses) +#define PUBLIC_MEMBERS(log_type, member, descr) \ + std::shared_ptr member; \ - std::vector logs; + LIST_OF_ALL_SYSTEM_LOGS(PUBLIC_MEMBERS) +#undef PUBLIC_MEMBERS +/// NOLINTEND(bugprone-macro-parentheses) + +private: + std::vector getAllLogs() const; }; struct SystemLogSettings From 633f700df60b878e59fe17d6204faf4e984b1009 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 2 Aug 2024 12:14:58 +0200 Subject: [PATCH 037/132] adjust tests --- .../test_system_flush_logs/test.py | 63 +++-- .../test_system_logs_recreate/test.py | 225 +++++++++--------- 2 files changed, 146 insertions(+), 142 deletions(-) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 0399122406a..44269883d1b 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -12,18 +12,6 @@ node = cluster.add_instance( stay_alive=True, ) -system_logs = [ - # enabled by default - ("system.text_log", 1), - ("system.query_log", 1), - ("system.query_thread_log", 1), - ("system.part_log", 1), - ("system.trace_log", 1), - ("system.metric_log", 1), - ("system.error_log", 1), -] - - @pytest.fixture(scope="module", autouse=True) def start_cluster(): try: @@ -33,22 +21,29 @@ def start_cluster(): cluster.shutdown() -@pytest.fixture(scope="function") -def flush_logs(): +def test_system_logs_exists(): + system_logs = [ + # disabled by default + ("system.text_log", 0), + ("system.query_log", 1), + ("system.query_thread_log", 1), + ("system.part_log", 1), + ("system.trace_log", 1), + ("system.metric_log", 1), + ("system.error_log", 1), + ] + node.query("SYSTEM FLUSH LOGS") - - -@pytest.mark.parametrize("table,exists", system_logs) -def test_system_logs(flush_logs, table, exists): - q = "SELECT * FROM {}".format(table) - if exists: - node.query(q) - else: - response = node.query_and_get_error(q) - assert ( - "Table {} does not exist".format(table) in response - or "Unknown table expression identifier '{}'".format(table) in response - ) + for table, exists in system_logs: + q = "SELECT * FROM {}".format(table) + if exists: + node.query(q) + else: + response = node.query_and_get_error(q) + assert ( + "Table {} does not exist".format(table) in response + or "Unknown table expression identifier '{}'".format(table) in response + ) # Logic is tricky, let's check that there is no hang in case of message queue @@ -67,11 +62,14 @@ def test_system_logs_non_empty_queue(): def test_system_suspend(): - node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;") - node.query("INSERT INTO t VALUES (now());") - node.query("SYSTEM SUSPEND FOR 1 SECOND;") - node.query("INSERT INTO t VALUES (now());") - assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + try: + node.query("CREATE TABLE t (x DateTime) ENGINE=Memory;") + node.query("INSERT INTO t VALUES (now());") + node.query("SYSTEM SUSPEND FOR 1 SECOND;") + node.query("INSERT INTO t VALUES (now());") + assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + finally: + node.query("DROP TABLE IF EXISTS t;") def test_log_max_size(start_cluster): @@ -95,6 +93,7 @@ def test_log_max_size(start_cluster): ] ) + node.query("SYSTEM FLUSH LOGS") node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") node.restart_clickhouse() diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index d5347ae7dea..c6d5861904c 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -33,124 +33,129 @@ def test_system_logs_recreate(): "error_log", ] - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + try: + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") + assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 1 ) - == 1 - ) - # NOTE: we use zzz- prefix to make it the last file, - # so that it will be applied last. - for table in system_logs: - node.exec_in_container( - [ - "bash", - "-c", - f"""echo " - - <{table}> - ENGINE = Null - - - - " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml - """, - ] - ) - - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" not in node.query( - f"SHOW CREATE TABLE system.{table}" - ) - assert "ENGINE = Null" in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + # NOTE: we use zzz- prefix to make it the last file, + # so that it will be applied last. + for table in system_logs: + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + <{table}> + ENGINE = Null + + + + " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml + """, + ] ) - == 2 - ) - # apply only storage_policy for all system tables - for table in system_logs: - node.exec_in_container( - [ - "bash", - "-c", - f"""echo " - - <{table}> - system_tables - - - " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml - """, - ] - ) - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - import logging - - for table in system_logs: - create_table_sql = node.query(f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw") - logging.debug( - "With storage policy, SHOW CREATE TABLE system.%s is: %s", - table, - create_table_sql, - ) - assert "ENGINE = MergeTree" in create_table_sql - assert "ENGINE = Null" not in create_table_sql - assert "SETTINGS storage_policy = 'system_tables'" in create_table_sql - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" not in node.query( + f"SHOW CREATE TABLE system.{table}" ) - == 3 - ) - - for table in system_logs: - node.exec_in_container( - ["rm", f"/etc/clickhouse-server/config.d/zzz-override-{table}.xml"] - ) - - node.restart_clickhouse() - node.query("SYSTEM FLUSH LOGS") - for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + assert "ENGINE = Null" in node.query(f"SHOW CREATE TABLE system.{table}") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 2 ) - == 4 - ) - node.query("SYSTEM FLUSH LOGS") - # Ensure that there was no superfluous RENAME's - # IOW that the table created only when the structure is indeed different. - for table in system_logs: - assert ( - len( - node.query(f"SHOW TABLES FROM system LIKE '{table}%'") - .strip() - .split("\n") + # apply only storage_policy for all system tables + for table in system_logs: + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + <{table}> + system_tables + + + " > /etc/clickhouse-server/config.d/zzz-override-{table}.xml + """, + ] ) - == 4 - ) + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + import logging + + for table in system_logs: + create_table_sql = node.query(f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw") + logging.debug( + "With storage policy, SHOW CREATE TABLE system.%s is: %s", + table, + create_table_sql, + ) + assert "ENGINE = MergeTree" in create_table_sql + assert "ENGINE = Null" not in create_table_sql + assert "SETTINGS storage_policy = 'system_tables'" in create_table_sql + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 3 + ) + + for table in system_logs: + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/zzz-override-{table}.xml"] + ) + + node.restart_clickhouse() + node.query("SYSTEM FLUSH LOGS") + for table in system_logs: + assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") + assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 4 + ) + + node.query("SYSTEM FLUSH LOGS") + # Ensure that there was no superfluous RENAME's + # IOW that the table created only when the structure is indeed different. + for table in system_logs: + assert ( + len( + node.query(f"SHOW TABLES FROM system LIKE '{table}%'") + .strip() + .split("\n") + ) + == 4 + ) + finally: + for table in system_logs: + for syffix in range(3): + node.query(f"DROP TABLE IF EXISTS system.{table}_{syffix} sync") def test_drop_system_log(): From 08f8d94856841254957e1e746685d32de2074dc6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 2 Aug 2024 12:16:44 +0200 Subject: [PATCH 038/132] no flush backup logs at shutdown, flush all logs --- src/Backups/BackupsWorker.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 106aa89082d..0b93ae6d547 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -1171,8 +1171,6 @@ void BackupsWorker::waitAll() for (const auto & id : current_operations) wait(id, /* rethrow_exception= */ false); - backup_log->flush(backup_log->getLastLogIndex(), /* should_prepare_tables_anyway */ false); - LOG_INFO(log, "Backups and restores finished"); } @@ -1223,8 +1221,6 @@ void BackupsWorker::cancelAll(bool wait_) for (const auto & id : current_operations) wait(id, /* rethrow_exception= */ false); - backup_log->flush(backup_log->getLastLogIndex(), /* should_prepare_tables_anyway */ false); - LOG_INFO(log, "Backups and restores finished or stopped"); } From b416764585e7e03382f66c99636b4bf0b51bd79f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 7 Aug 2024 13:02:38 +0200 Subject: [PATCH 039/132] put description of system log table in one place --- src/Interpreters/SystemLog.cpp | 35 ++++------------------ src/Interpreters/SystemLog.h | 54 ++++++++++++++++------------------ 2 files changed, 30 insertions(+), 59 deletions(-) diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 9b58da3f545..d4403b72583 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -284,34 +284,11 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config) { - query_log = createSystemLog(global_context, "system", "query_log", config, "query_log", "Contains information about executed queries, for example, start time, duration of processing, error messages."); - query_thread_log = createSystemLog(global_context, "system", "query_thread_log", config, "query_thread_log", "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing."); - part_log = createSystemLog(global_context, "system", "part_log", config, "part_log", "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data."); - trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log", "Contains stack traces collected by the sampling query profiler."); - crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); - text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); - metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); - error_log = createSystemLog(global_context, "system", "error_log", config, "error_log", "Contains history of error values from table system.errors, periodically flushed to disk."); - filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); - filesystem_read_prefetches_log = createSystemLog( - global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); - asynchronous_metric_log = createSystemLog( - global_context, "system", "asynchronous_metric_log", config, - "asynchronous_metric_log", "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default)."); - opentelemetry_span_log = createSystemLog( - global_context, "system", "opentelemetry_span_log", config, - "opentelemetry_span_log", "Contains information about trace spans for executed queries."); - query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log", "Contains information about the dependent views executed when running a query, for example, the view type or the execution time."); - zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log", "This table contains information about the parameters of the request to the ZooKeeper server and the response from it."); - session_log = createSystemLog(global_context, "system", "session_log", config, "session_log", "Contains information about all successful and failed login and logout events."); - transactions_info_log = createSystemLog( - global_context, "system", "transactions_info_log", config, "transactions_info_log", "Contains information about all transactions executed on a current server."); - processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log", "Contains profiling information on processors level (building blocks for a pipeline for query execution."); - asynchronous_insert_log = createSystemLog(global_context, "system", "asynchronous_insert_log", config, "asynchronous_insert_log", "Contains a history for all asynchronous inserts executed on current server."); - backup_log = createSystemLog(global_context, "system", "backup_log", config, "backup_log", "Contains logging entries with the information about BACKUP and RESTORE operations."); - s3_queue_log = createSystemLog(global_context, "system", "s3queue_log", config, "s3queue_log", "Contains logging entries with the information files processes by S3Queue engine."); - azure_queue_log = createSystemLog(global_context, "system", "azure_queue_log", config, "azure_queue_log", "Contains logging entries with the information files processes by S3Queue engine."); - blob_storage_log = createSystemLog(global_context, "system", "blob_storage_log", config, "blob_storage_log", "Contains logging entries with information about various blob storage operations such as uploads and deletes."); +#define CREATE_PUBLIC_MEMBERS(log_type, member, descr) \ + member = createSystemLog(global_context, "system", #member, config, #member, descr); \ + + LIST_OF_ALL_SYSTEM_LOGS(CREATE_PUBLIC_MEMBERS) +#undef CREATE_PUBLIC_MEMBERS if (session_log) global_context->addWarningMessage("Table system.session_log is enabled. It's unreliable and may contain garbage. Do not use it for any kind of security monitoring."); @@ -355,7 +332,6 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf std::vector SystemLogs::getAllLogs() const { -/// NOLINTBEGIN(bugprone-macro-parentheses) #define GET_RAW_POINTERS(log_type, member, descr) \ member.get(), \ @@ -363,7 +339,6 @@ std::vector SystemLogs::getAllLogs() const LIST_OF_ALL_SYSTEM_LOGS(GET_RAW_POINTERS) }; #undef GET_RAW_POINTERS -/// NOLINTEND(bugprone-macro-parentheses) auto last_it = std::remove(result.begin(), result.end(), nullptr); result.erase(last_it, result.end()); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 093be203282..6682829c0c6 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -8,28 +8,28 @@ #include #define LIST_OF_ALL_SYSTEM_LOGS(M) \ - M(QueryLog, query_log, "Used to log queries.") \ - M(QueryThreadLog, query_thread_log, "Used to log query threads.") \ - M(PartLog, part_log, "Used to log operations with parts.") \ - M(TraceLog, trace_log, "Used to log traces from query profiler.") \ - M(CrashLog, crash_log, "Used to log server crashes.") \ - M(TextLog, text_log, "Used to log all text messages.") \ - M(MetricLog, metric_log, "Used to log all metrics.") \ - M(ErrorLog, error_log, "Used to log errors.") \ - M(FilesystemCacheLog, filesystem_cache_log, "") \ - M(FilesystemReadPrefetchesLog, filesystem_read_prefetches_log, "") \ - M(ObjectStorageQueueLog, s3_queue_log, "") \ - M(ObjectStorageQueueLog, azure_queue_log, "") \ - M(AsynchronousMetricLog, asynchronous_metric_log, "Metrics from system.asynchronous_metrics") \ - M(OpenTelemetrySpanLog, opentelemetry_span_log, "OpenTelemetry trace spans.") \ - M(QueryViewsLog, query_views_log, "Used to log queries of materialized and live views.") \ - M(ZooKeeperLog, zookeeper_log, "Used to log all actions of ZooKeeper client.") \ - M(SessionLog, session_log, "Login, LogOut and Login failure events.") \ - M(TransactionsInfoLog, transactions_info_log, "Events related to transactions.") \ - M(ProcessorsProfileLog, processors_profile_log, "Used to log processors profiling") \ - M(AsynchronousInsertLog, asynchronous_insert_log, "") \ - M(BackupLog, backup_log, "Backup and restore events") \ - M(BlobStorageLog, blob_storage_log, "Log blob storage operations") \ + M(QueryLog, query_log, "Contains information about executed queries, for example, start time, duration of processing, error messages.") \ + M(QueryThreadLog, query_thread_log, "Contains information about threads that execute queries, for example, thread name, thread start time, duration of query processing.") \ + M(PartLog, part_log, "This table contains information about events that occurred with data parts in the MergeTree family tables, such as adding or merging data.") \ + M(TraceLog, trace_log, "Contains stack traces collected by the sampling query profiler.") \ + M(CrashLog, crash_log, "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur.") \ + M(TextLog, text_log, "Contains logging entries which are normally written to a log file or to stdout.") \ + M(MetricLog, metric_log, "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk.") \ + M(ErrorLog, error_log, "Contains history of error values from table system.errors, periodically flushed to disk.") \ + M(FilesystemCacheLog, filesystem_cache_log, "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem.") \ + M(FilesystemReadPrefetchesLog, filesystem_read_prefetches_log, "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem.") \ + M(ObjectStorageQueueLog, s3_queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(ObjectStorageQueueLog, azure_queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(AsynchronousMetricLog, asynchronous_metric_log, "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default).") \ + M(OpenTelemetrySpanLog, opentelemetry_span_log, "Contains information about trace spans for executed queries.") \ + M(QueryViewsLog, query_views_log, "Contains information about the dependent views executed when running a query, for example, the view type or the execution time.") \ + M(ZooKeeperLog, zookeeper_log, "This table contains information about the parameters of the request to the ZooKeeper server and the response from it.") \ + M(SessionLog, session_log, "Contains information about all successful and failed login and logout events.") \ + M(TransactionsInfoLog, transactions_info_log, "Contains information about all transactions executed on a current server.") \ + M(ProcessorsProfileLog, processors_profile_log, "Contains profiling information on processors level (building blocks for a pipeline for query execution.") \ + M(AsynchronousInsertLog, asynchronous_insert_log, "Contains a history for all asynchronous inserts executed on current server.") \ + M(BackupLog, backup_log, "Contains logging entries with the information about BACKUP and RESTORE operations.") \ + M(BlobStorageLog, blob_storage_log, "Contains logging entries with information about various blob storage operations such as uploads and deletes.") \ namespace DB @@ -60,13 +60,11 @@ namespace DB }; */ -/// NOLINTBEGIN(bugprone-macro-parentheses) #define FORWARD_DECLARATION(log_type, member, descr) \ class log_type; \ LIST_OF_ALL_SYSTEM_LOGS(FORWARD_DECLARATION) #undef FORWARD_DECLARATION -/// NOLINTEND(bugprone-macro-parentheses) /// System logs should be destroyed in destructor of the last Context and before tables, @@ -83,13 +81,11 @@ public: void shutdown(); void handleCrash(); -/// NOLINTBEGIN(bugprone-macro-parentheses) -#define PUBLIC_MEMBERS(log_type, member, descr) \ +#define DECLARE_PUBLIC_MEMBERS(log_type, member, descr) \ std::shared_ptr member; \ - LIST_OF_ALL_SYSTEM_LOGS(PUBLIC_MEMBERS) -#undef PUBLIC_MEMBERS -/// NOLINTEND(bugprone-macro-parentheses) + LIST_OF_ALL_SYSTEM_LOGS(DECLARE_PUBLIC_MEMBERS) +#undef DECLARE_PUBLIC_MEMBERS private: std::vector getAllLogs() const; From b9f564f6f4fa98cc385acd6ce3c8e9f736bc55e0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 7 Aug 2024 11:12:11 +0000 Subject: [PATCH 040/132] Automatic style fix --- .../test_system_flush_logs/test.py | 1 + .../test_system_logs_recreate/test.py | 20 ++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index 44269883d1b..dd48ef055f5 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -12,6 +12,7 @@ node = cluster.add_instance( stay_alive=True, ) + @pytest.fixture(scope="module", autouse=True) def start_cluster(): try: diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index c6d5861904c..8b84734ed02 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -36,8 +36,12 @@ def test_system_logs_recreate(): try: node.query("SYSTEM FLUSH LOGS") for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") + assert "ENGINE = MergeTree" in node.query( + f"SHOW CREATE TABLE system.{table}" + ) + assert "ENGINE = Null" not in node.query( + f"SHOW CREATE TABLE system.{table}" + ) assert ( len( node.query(f"SHOW TABLES FROM system LIKE '{table}%'") @@ -103,7 +107,9 @@ def test_system_logs_recreate(): import logging for table in system_logs: - create_table_sql = node.query(f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw") + create_table_sql = node.query( + f"SHOW CREATE TABLE system.{table} FORMAT TSVRaw" + ) logging.debug( "With storage policy, SHOW CREATE TABLE system.%s is: %s", table, @@ -129,8 +135,12 @@ def test_system_logs_recreate(): node.restart_clickhouse() node.query("SYSTEM FLUSH LOGS") for table in system_logs: - assert "ENGINE = MergeTree" in node.query(f"SHOW CREATE TABLE system.{table}") - assert "ENGINE = Null" not in node.query(f"SHOW CREATE TABLE system.{table}") + assert "ENGINE = MergeTree" in node.query( + f"SHOW CREATE TABLE system.{table}" + ) + assert "ENGINE = Null" not in node.query( + f"SHOW CREATE TABLE system.{table}" + ) assert ( len( node.query(f"SHOW TABLES FROM system LIKE '{table}%'") From ddc058aa6ff6780fa67bc5c59d9d7ff9a71d4ee1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Aug 2024 12:51:06 +0200 Subject: [PATCH 041/132] Update minio in stateless tests --- docker/test/stateless/Dockerfile | 4 ++-- docker/test/stateless/setup_minio.sh | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index a0e5513a3a2..d8eb072328f 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -69,8 +69,8 @@ ENV MAX_RUN_TIME=0 # Unrelated to vars in setup_minio.sh, but should be the same there # to have the same binaries for local running scenario -ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z -ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z +ARG MINIO_SERVER_VERSION=2024-08-03T04-33-23Z +ARG MINIO_CLIENT_VERSION=2024-07-31T15-58-33Z ARG TARGETARCH # Download Minio-related binaries diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh index 2b9433edd20..d8310d072b8 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/docker/test/stateless/setup_minio.sh @@ -59,8 +59,8 @@ find_os() { download_minio() { local os local arch - local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z} - local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z} + local minio_server_version=${MINIO_SERVER_VERSION:-2024-08-03T04-33-23Z} + local minio_client_version=${MINIO_CLIENT_VERSION:-2024-07-31T15-58-33Z} os=$(find_os) arch=$(find_arch) @@ -82,10 +82,10 @@ setup_minio() { local test_type=$1 ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse ./mc admin user add clickminio test testtest - ./mc admin policy set clickminio readwrite user=test + ./mc admin policy attach clickminio readwrite --user=test ./mc mb --ignore-existing clickminio/test if [ "$test_type" = "stateless" ]; then - ./mc policy set public clickminio/test + ./mc anonymous set public clickminio/test fi } @@ -148,4 +148,4 @@ main() { setup_aws_credentials } -main "$@" \ No newline at end of file +main "$@" From 6afff5824e3bd3e4eca2c50d7cce10dda5678433 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 7 Aug 2024 12:14:05 +0000 Subject: [PATCH 042/132] Fix 03130_convert_outer_join_to_inner_join --- .../03130_convert_outer_join_to_inner_join.sql | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql b/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql index 28362f1f469..168177a06d7 100644 --- a/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql +++ b/tests/queries/0_stateless/03130_convert_outer_join_to_inner_join.sql @@ -6,14 +6,18 @@ CREATE TABLE test_table_1 ( id UInt64, value String -) ENGINE=MergeTree ORDER BY id; +) ENGINE=MergeTree ORDER BY id +SETTINGS index_granularity = 16 # We have number of granules in the `EXPLAIN` output in reference file +; DROP TABLE IF EXISTS test_table_2; CREATE TABLE test_table_2 ( id UInt64, value String -) ENGINE=MergeTree ORDER BY id; +) ENGINE=MergeTree ORDER BY id +SETTINGS index_granularity = 16 +; INSERT INTO test_table_1 VALUES (1, 'Value_1'), (2, 'Value_2'); INSERT INTO test_table_2 VALUES (2, 'Value_2'), (3, 'Value_3'); From 28c8d158635ef2564a60c25a6f971aa324030dcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 7 Aug 2024 13:37:56 +0000 Subject: [PATCH 043/132] Fix style --- tests/queries/0_stateless/03217_filtering_in_system_tables.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql index bbc755e478d..72ca7c8684d 100644 --- a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -16,10 +16,12 @@ SYSTEM FLUSH LOGS; -- StorageSystemTables SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND current_database = currentDatabase() AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';' AND type = 'QueryFinish'; -- StorageSystemReplicas SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND current_database = currentDatabase() AND query LIKE '%SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name = \'r1\';' AND type = 'QueryFinish'; From d2e9833dba4038999e7aeb540ea6b56cc1c59449 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Aug 2024 17:00:41 +0200 Subject: [PATCH 044/132] Add minio audit logs --- docker/test/stateless/run.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ea32df23af0..b33c261dacc 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -176,6 +176,25 @@ done setup_logs_replication attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 +# create minio log webhooks for both audit and server logs +clickhouse-client --query "CREATE TABLE minio_audit_logs +( + log String, + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(substring(JSONExtractRaw(log, 'time'), 2, 29), 9, 'UTC') +) +ENGINE = MergeTree +ORDER BY tuple()" +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" + +clickhouse-client --query "CREATE TABLE minio_server_logs +( + log String, + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(substring(JSONExtractRaw(log, 'time'), 2, 29), 9, 'UTC') +) +ENGINE = MergeTree +ORDER BY tuple()" +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" + function fn_exists() { declare -F "$1" > /dev/null; } @@ -328,6 +347,11 @@ do fi done + +# collect minio audit and server logs +clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" + # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? # Because it's the simplest way to read it when server has crashed. From ad678cb5a8d4533a88273b8244dc0844c83e641c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 7 Aug 2024 18:24:03 +0200 Subject: [PATCH 045/132] Ignore disappeared projections on start --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 33 +++++++++++- .../MergeTree/MergeTreeDataPartChecksum.cpp | 6 --- .../MergeTree/MergeTreeDataPartChecksum.h | 3 -- .../test_broken_projections/test.py | 50 +++++++++++++++++++ 4 files changed, 81 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3a44359b537..918a4cda714 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -72,6 +72,7 @@ namespace ErrorCodes extern const int BAD_TTL_FILE; extern const int NOT_IMPLEMENTED; extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int FILE_DOESNT_EXIST; } @@ -749,8 +750,16 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks /// Probably there is something wrong with files of this part. /// So it can be helpful to add to the error message some information about those files. String files_in_part; + for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next()) - files_in_part += fmt::format("{}{} ({} bytes)", (files_in_part.empty() ? "" : ", "), it->name(), getDataPartStorage().getFileSize(it->name())); + { + std::string file_info; + if (!getDataPartStorage().isDirectory(it->name())) + file_info = fmt::format(" ({} bytes)", getDataPartStorage().getFileSize(it->name())); + + files_in_part += fmt::format("{}{}{}", (files_in_part.empty() ? "" : ", "), it->name(), file_info); + + } if (!files_in_part.empty()) e->addMessage("Part contains files: {}", files_in_part); if (isEmpty()) @@ -2141,7 +2150,27 @@ void IMergeTreeDataPart::checkConsistencyBase() const } } - checksums.checkSizes(getDataPartStorage()); + const auto & data_part_storage = getDataPartStorage(); + for (const auto & [filename, checksum] : checksums.files) + { + try + { + checksum.checkSize(data_part_storage, filename); + } + catch (const Exception & ex) + { + /// For projection parts check will mark them broken in loadProjections + if (!parent_part && filename.ends_with(".proj")) + { + std::string projection_name = fs::path(filename).stem(); + LOG_INFO(storage.log, "Projection {} doesn't exist on start for part {}, marking it as broken", projection_name, name); + if (hasProjection(projection_name)) + markProjectionPartAsBroken(projection_name, ex.message(), ex.code()); + } + else + throw; + } + } } else { diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index b327480fa92..3ef36ce364c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -100,12 +100,6 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r } } -void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const -{ - for (const auto & [name, checksum] : files) - checksum.checkSize(storage, name); -} - UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const { UInt64 res = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 05178dc3a60..dc52f1ada2b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -65,9 +65,6 @@ struct MergeTreeDataPartChecksums static bool isBadChecksumsErrorCode(int code); - /// Checks that the directory contains all the needed files of the correct size. Does not check the checksum. - void checkSizes(const IDataPartStorage & storage) const; - /// Returns false if the checksum is too old. bool read(ReadBuffer & in); /// Assume that header with version (the first line) is read diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py index 162c0dbaa2f..578ff42369c 100644 --- a/tests/integration/test_broken_projections/test.py +++ b/tests/integration/test_broken_projections/test.py @@ -4,6 +4,7 @@ import logging import string import random from helpers.cluster import ClickHouseCluster +from multiprocessing.dummy import Pool cluster = ClickHouseCluster(__file__) @@ -18,6 +19,12 @@ def cluster(): stay_alive=True, with_zookeeper=True, ) + cluster.add_instance( + "node_restart", + main_configs=["config.d/dont_start_broken.xml"], + stay_alive=True, + with_zookeeper=True, + ) logging.info("Starting cluster...") cluster.start() @@ -632,6 +639,49 @@ def test_broken_on_start(cluster): check(node, table_name, 0) +def test_disappeared_projection_on_start(cluster): + node = cluster.instances["node_restart"] + + table_name = "test_disapperead_projection" + create_table(node, table_name, 1) + + node.query(f"SYSTEM STOP MERGES {table_name}") + + insert(node, table_name, 0, 5) + insert(node, table_name, 5, 5) + insert(node, table_name, 10, 5) + insert(node, table_name, 15, 5) + + assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts( + node, table_name + ) + + def drop_projection(): + node.query( + f"ALTER TABLE {table_name} DROP PROJECTION proj2", + settings={"mutations_sync": "0"}, + ) + + p = Pool(2) + p.apply_async(drop_projection) + + for i in range(30): + create_query = node.query(f"SHOW CREATE TABLE {table_name}") + if "proj2" not in create_query: + break + time.sleep(0.5) + + assert "proj2" not in create_query + + # Remove 'proj2' for part all_2_2_0 + break_projection(node, table_name, "proj2", "all_2_2_0", "part") + + node.restart_clickhouse() + + # proj2 is not broken, it doesn't exist, but ok + check(node, table_name, 0, expect_broken_part="proj2", do_check_command=0) + + def test_mutation_with_broken_projection(cluster): node = cluster.instances["node"] From 0dc4d773edd530494c1ab514d104a45665bdd16a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 7 Aug 2024 18:46:34 +0200 Subject: [PATCH 046/132] Fxi style --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 918a4cda714..93904c1a838 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -72,7 +72,6 @@ namespace ErrorCodes extern const int BAD_TTL_FILE; extern const int NOT_IMPLEMENTED; extern const int NO_SUCH_COLUMN_IN_TABLE; - extern const int FILE_DOESNT_EXIST; } From ec3a248e70e16d2cb4db1ec5ff17e95cc11dedae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 7 Aug 2024 17:37:54 +0000 Subject: [PATCH 047/132] Fix clang-tidy --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b24d7968b61..5ee0bd328e0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1146,7 +1146,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false); if (!filter_dag) return {}; From d81b5239debaf01b74521511db44d6cb4cd419c4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 7 Aug 2024 21:37:01 +0200 Subject: [PATCH 048/132] Remove unused CLI option --- src/Client/ClientApplicationBase.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp index 9f133616d2e..71d13ad4f53 100644 --- a/src/Client/ClientApplicationBase.cpp +++ b/src/Client/ClientApplicationBase.cpp @@ -200,8 +200,6 @@ void ClientApplicationBase::init(int argc, char ** argv) ("pager", po::value(), "Pipe all output into this command (less or similar)") ("max_memory_usage_in_client", po::value(), "Set memory limit in client/local server") - ("fuzzer-args", po::value(), "Command line arguments for the LLVM's libFuzzer driver. Only relevant if the application is compiled with libFuzzer.") - ("client_logs_file", po::value(), "Path to a file for writing client logs. Currently we only have fatal logs (when the client crashes)") ; From 1dece979fe317a04f98d2b8008619c47fb72edb1 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 7 Aug 2024 23:10:40 +0200 Subject: [PATCH 049/132] CI: pass job timout into tests --- tests/ci/ci.py | 1 + tests/ci/ci_definitions.py | 2 +- tests/ci/functional_test_check.py | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 8d0414ce7a8..49b597333dc 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -985,6 +985,7 @@ def _run_test(job_name: str, run_command: str) -> int: else: print("Use run command from the workflow") env["CHECK_NAME"] = job_name + env["MAX_RUN_TIME"] = str(timeout or 0) print(f"Going to start run command [{run_command}]") stopwatch = Stopwatch() job_log = Path(TEMP_PATH) / "job_log.txt" diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48847b0d7a6..592cb2f4879 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -420,7 +420,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=9000, + timeout=1000, # test ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 52970404d2d..3aff97643c3 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -114,6 +114,9 @@ def get_run_command( if flaky_check: envs.append("-e NUM_TRIES=50") envs.append("-e MAX_RUN_TIME=2800") + else: + max_run_time = os.getenv("MAX_RUN_TIME", 0) + envs.append(f"-e MAX_RUN_TIME={max_run_time}") envs += [f"-e {e}" for e in additional_envs] From 086b3d240dd696c483f136d79db4587a83bb0a14 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 7 Aug 2024 23:34:36 +0200 Subject: [PATCH 050/132] CI: push CI --- tests/ci/ci_definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 592cb2f4879..b62d2e0aa8e 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -420,7 +420,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=1000, # test + timeout=1001, # test ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", From 55ad7d30946d609159fe5ae9156f02f5b160585a Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 8 Aug 2024 00:08:12 +0200 Subject: [PATCH 051/132] Fix stylelint --- tests/ci/functional_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 3aff97643c3..b7391eff01b 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -115,7 +115,7 @@ def get_run_command( envs.append("-e NUM_TRIES=50") envs.append("-e MAX_RUN_TIME=2800") else: - max_run_time = os.getenv("MAX_RUN_TIME", 0) + max_run_time = os.getenv("MAX_RUN_TIME", "0") envs.append(f"-e MAX_RUN_TIME={max_run_time}") envs += [f"-e {e}" for e in additional_envs] From 8426e0d5e5d7f102fd57a45c82ae6acccda65369 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 7 Aug 2024 16:44:10 +0800 Subject: [PATCH 052/132] fix crash --- src/Interpreters/HashJoin/HashJoinMethodsImpl.h | 3 +-- src/Interpreters/HashJoin/JoinFeatures.h | 2 +- .../03006_join_on_inequal_expression_fast.reference | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h index 2bf5f6aef4a..5fefe53d145 100644 --- a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h +++ b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h @@ -361,7 +361,6 @@ size_t HashJoinMethods::joinRightColumns( } bool right_row_found = false; - KnownRowsHolder known_rows; for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) { @@ -693,7 +692,7 @@ size_t HashJoinMethods::joinRightColumnsWithAddti any_matched = true; if constexpr (join_features.is_semi_join || join_features.is_any_join) { - auto used_once = used_flags.template setUsedOnce( + auto used_once = used_flags.template setUsedOnce( selected_right_row_it->block, selected_right_row_it->row_num, 0); if (used_once) { diff --git a/src/Interpreters/HashJoin/JoinFeatures.h b/src/Interpreters/HashJoin/JoinFeatures.h index a530179f0b4..b8de606c51e 100644 --- a/src/Interpreters/HashJoin/JoinFeatures.h +++ b/src/Interpreters/HashJoin/JoinFeatures.h @@ -22,7 +22,7 @@ struct JoinFeatures static constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); static constexpr bool add_missing = (left || full) && !is_semi_join; - static constexpr bool need_flags = MapGetter, HashJoin::MapsOne>>::flagged; + static constexpr bool need_flags = MapGetter, HashJoin::MapsAll>>::flagged; static constexpr bool is_maps_all = std::is_same_v, HashJoin::MapsAll>; }; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference index 11ac01d24d5..aa8d4103db2 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -1,4 +1,3 @@ -03006_join_on_inequal_expression_fast.sql -- { echoOn } SET join_algorithm='hash'; SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); From 142e9528f0f1f7daa68f296181f1c0e5f7b6108b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 03:10:45 +0200 Subject: [PATCH 053/132] Add a test for #57420 --- .../03218_materialize_msan.reference | 1 + .../0_stateless/03218_materialize_msan.sql | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/03218_materialize_msan.reference create mode 100644 tests/queries/0_stateless/03218_materialize_msan.sql diff --git a/tests/queries/0_stateless/03218_materialize_msan.reference b/tests/queries/0_stateless/03218_materialize_msan.reference new file mode 100644 index 00000000000..eb84f35f9f4 --- /dev/null +++ b/tests/queries/0_stateless/03218_materialize_msan.reference @@ -0,0 +1 @@ +[(NULL,'11\01111111\011111','1111')] -2147483648 \N diff --git a/tests/queries/0_stateless/03218_materialize_msan.sql b/tests/queries/0_stateless/03218_materialize_msan.sql new file mode 100644 index 00000000000..b41300ea1e3 --- /dev/null +++ b/tests/queries/0_stateless/03218_materialize_msan.sql @@ -0,0 +1,21 @@ +SELECT + materialize([(NULL, '11\01111111\011111', '1111')]) AS t, + (t[1048576]).2, + materialize(-2147483648), + (t[-2147483648]).1 +GROUP BY + materialize([(NULL, '1')]), + '', + (materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL, 2147483647, t[65535], 256)), materialize(NULL)) +; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT + materialize([(NULL, '11\01111111\011111', '1111')]) AS t, + (t[1048576]).2, + materialize(-2147483648), + (t[-2147483648]).1 +GROUP BY + materialize([(NULL, '1')]), + '', + (materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL), materialize(2147483647), materialize(t[65535]), materialize(256)), materialize(NULL)) +; From d6ecabb41dff64a1fb8ac5a77ffc1a8bed15162b Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 5 Aug 2024 00:40:23 +0000 Subject: [PATCH 054/132] Fix flaky test_storage_s3_queue/test.py::test_multiple_tables_streaming_sync_distributed Disable parallel processing for the Ordered mode for the test_storage_s3_queue/test.py::test_multiple_tables_streaming_sync_distributed test. The reason for this is that the load between the processing nodes is too uneven when s3queue_processing_threads_num != 1, e.g.: ``` $ grep res1 pytest.log 2024-08-07 07:15:58 [ 575 ] DEBUG : res1 size: 13300, res2 size: 1700, total_rows: 15000 (test.py:813, test_multiple_tables_streaming_sync_distributed) ``` In CIs environment, there are rare cases when one of the processors handles all the workload, while the other is busy-waiting, and the test fails on assert: When s3queue_processing_threads_num == 1, the workload is evenly distributed: ``` $ grep res1 pytest.log 2024-08-07 07:26:52 [ 586 ] DEBUG : res1 size: 7200, res2 size: 7800, total_rows: 15000 (test.py:813, test_multiple_tables_streaming_sync_distributed) ``` This change only fixes test flakiness. Further investigation of the Order mode parallelism is required. --- tests/integration/test_storage_s3_queue/test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 92d6f181464..8f197e09e61 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -771,7 +771,11 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): table_name, mode, files_path, - additional_settings={"keeper_path": keeper_path, "s3queue_buckets": 2}, + additional_settings={ + "keeper_path": keeper_path, + "s3queue_buckets": 2, + **({"s3queue_processing_threads_num": 1} if mode == "ordered" else {}), + }, ) for instance in [node, node_2]: @@ -806,6 +810,10 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): list(map(int, l.split())) for l in run_query(node_2, get_query).splitlines() ] + logging.debug( + f"res1 size: {len(res1)}, res2 size: {len(res2)}, total_rows: {total_rows}" + ) + assert len(res1) + len(res2) == total_rows # Checking that all engines have made progress From bd3674e6e982b66f571248a717628649b1482fe5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 7 Aug 2024 20:13:50 +0200 Subject: [PATCH 055/132] Add restart --- docker/test/stateless/run.sh | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index b33c261dacc..19f2cfca43f 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -54,8 +54,6 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless -./mc admin trace clickminio > /test_output/minio.log & -MC_ADMIN_PID=$! ./setup_hdfs_minicluster.sh @@ -176,7 +174,7 @@ done setup_logs_replication attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 -# create minio log webhooks for both audit and server logs +# create tables for minio log webhooks clickhouse-client --query "CREATE TABLE minio_audit_logs ( log String, @@ -184,7 +182,6 @@ clickhouse-client --query "CREATE TABLE minio_audit_logs ) ENGINE = MergeTree ORDER BY tuple()" -./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" clickhouse-client --query "CREATE TABLE minio_server_logs ( @@ -193,7 +190,36 @@ clickhouse-client --query "CREATE TABLE minio_server_logs ) ENGINE = MergeTree ORDER BY tuple()" + +# create minio log webhooks for both audit and server logs ./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" +max_retries=100 +retry=1 + +while [ $retry -le $max_retries ]; do + echo "clickminio restart attempt $retry:" + + output=$(mc admin service restart clickminio 2>&1) + echo "$output" + + if echo "$output" | grep -q "Restarted \`clickminio\` successfully in 1 seconds"; then + echo "Restarted clickminio successfully." + break + fi + + sleep 1 + + retry=$((retry + 1)) +done + +if [ $retry -gt $max_retries ]; then + echo "Failed to restart clickminio after $max_retries attempts." +fi + +./mc admin service restart clickminio +./mc admin trace clickminio > /test_output/minio.log & +MC_ADMIN_PID=$! function fn_exists() { declare -F "$1" > /dev/null; From 9d30b45dbeddec977e2b5ef1ca8286c35c012129 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Aug 2024 08:43:49 +0100 Subject: [PATCH 056/132] Fix --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 19f2cfca43f..4d86afa4bac 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -200,7 +200,7 @@ retry=1 while [ $retry -le $max_retries ]; do echo "clickminio restart attempt $retry:" - output=$(mc admin service restart clickminio 2>&1) + output=$(./mc admin service restart clickminio 2>&1) echo "$output" if echo "$output" | grep -q "Restarted \`clickminio\` successfully in 1 seconds"; then From f4aac7bbd9431e4d95eadfea31239b331ea18d77 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Aug 2024 09:19:45 +0100 Subject: [PATCH 057/132] Another fix --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 4d86afa4bac..830a02a64a3 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -203,7 +203,7 @@ while [ $retry -le $max_retries ]; do output=$(./mc admin service restart clickminio 2>&1) echo "$output" - if echo "$output" | grep -q "Restarted \`clickminio\` successfully in 1 seconds"; then + if echo "$output" | grep -q "Restarted \`clickminio\` successfully"; then echo "Restarted clickminio successfully." break fi From d0f35ce6a60e13b8aff9687a45e293ce89693241 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Aug 2024 10:29:01 +0200 Subject: [PATCH 058/132] Fix setting prefix --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 433a0e96d2e..7205b5b3294 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -305,7 +305,8 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet S3::ListObjectsV2Request request; request.SetBucket(uri.bucket); - request.SetPrefix(path); + if (path != "/") + request.SetPrefix(path); if (max_keys) request.SetMaxKeys(static_cast(max_keys)); else From 59b737c9ac045ca0ec48eb2b8893c5e54646003a Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 8 Aug 2024 10:50:14 +0200 Subject: [PATCH 059/132] CI: set correct timeout for stateless tests --- tests/ci/ci_definitions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index b62d2e0aa8e..48847b0d7a6 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -420,7 +420,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=1001, # test + timeout=9000, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", From 69ac203c9fce9972b89082ca653894fc8709f2fd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 8 Aug 2024 12:56:33 +0200 Subject: [PATCH 060/132] fix tests --- src/Interpreters/Context.cpp | 2 +- src/Interpreters/SystemLog.h | 2 +- tests/integration/test_system_flush_logs/test.py | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 3051ed3e567..4a08fd5fe5b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4256,7 +4256,7 @@ std::shared_ptr Context::getS3QueueLog() const if (!shared->system_logs) return {}; - return shared->system_logs->s3_queue_log; + return shared->system_logs->s3queue_log; } std::shared_ptr Context::getAzureQueueLog() const diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 6682829c0c6..24ef6a18eb8 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -18,7 +18,7 @@ M(ErrorLog, error_log, "Contains history of error values from table system.errors, periodically flushed to disk.") \ M(FilesystemCacheLog, filesystem_cache_log, "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem.") \ M(FilesystemReadPrefetchesLog, filesystem_read_prefetches_log, "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem.") \ - M(ObjectStorageQueueLog, s3_queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ + M(ObjectStorageQueueLog, s3queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ M(ObjectStorageQueueLog, azure_queue_log, "Contains logging entries with the information files processes by S3Queue engine.") \ M(AsynchronousMetricLog, asynchronous_metric_log, "Contains the historical values for system.asynchronous_metrics, once per time interval (one second by default).") \ M(OpenTelemetrySpanLog, opentelemetry_span_log, "Contains information about trace spans for executed queries.") \ diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index dd48ef055f5..cfecea5b3d6 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -24,8 +24,7 @@ def start_cluster(): def test_system_logs_exists(): system_logs = [ - # disabled by default - ("system.text_log", 0), + ("system.text_log", 1), ("system.query_log", 1), ("system.query_thread_log", 1), ("system.part_log", 1), From b0ba53788ac758ac1405ceefacd91bb0418b5834 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 7 Aug 2024 16:29:07 +0000 Subject: [PATCH 061/132] Refactor tests for (experimental) statistics --- docs/en/development/tests.md | 4 +- .../statements/alter/statistics.md | 16 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- ...2864_statistics_count_min_sketch.reference | 14 -- .../02864_statistics_count_min_sketch.sql | 70 ------ .../02864_statistics_ddl.reference | 37 +-- .../0_stateless/02864_statistics_ddl.sql | 234 ++++++++++++++---- ...delayed_materialization_in_merge.reference | 12 + ...stics_delayed_materialization_in_merge.sql | 36 +++ .../02864_statistics_exception.reference | 0 .../02864_statistics_exception.sql | 55 ---- ..._statistics_materialize_in_merge.reference | 10 - .../02864_statistics_materialize_in_merge.sql | 52 ---- .../02864_statistics_predicates.reference | 92 +++++++ .../02864_statistics_predicates.sql | 214 ++++++++++++++++ .../02864_statistics_uniq.reference | 35 --- .../0_stateless/02864_statistics_uniq.sql | 73 ------ .../02864_statistics_usage.reference | 20 ++ .../0_stateless/02864_statistics_usage.sql | 42 ++++ 19 files changed, 619 insertions(+), 399 deletions(-) delete mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.sql create mode 100644 tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference create mode 100644 tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_exception.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_exception.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql create mode 100644 tests/queries/0_stateless/02864_statistics_predicates.reference create mode 100644 tests/queries/0_stateless/02864_statistics_predicates.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_uniq.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_uniq.sql create mode 100644 tests/queries/0_stateless/02864_statistics_usage.reference create mode 100644 tests/queries/0_stateless/02864_statistics_usage.sql diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 269995a1a96..6cb36e2049b 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. :::note A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs @@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 6880cef0e5c..7a1774a01b5 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -8,26 +8,28 @@ sidebar_label: STATISTICS The following operations are available: -- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata. +- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata. -- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata. +- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata. -- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. +- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. -- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. +- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. -- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). The first two commands are lightweight in a sense that they only change metadata or remove files. Also, they are replicated, syncing statistics metadata via ZooKeeper. -There is an example adding two statistics types to two columns: +## Example: + +Adding two statistics types to two columns: ``` ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` :::note -Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 051d52a71cd..fe4857e9449 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3513,7 +3513,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context const auto & new_column = new_metadata.getColumns().get(command.column_name); if (!old_column.type->equals(*new_column.type)) throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, - "ALTER types of column {} with statistics is not not safe " + "ALTER types of column {} with statistics is not safe " "because it can change the representation of statistics", backQuoteIfNeed(command.column_name)); } diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference deleted file mode 100644 index 02c41656a36..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics count_min: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) -Test statistics multi-types: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test LowCardinality and Nullable data type: -tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql deleted file mode 100644 index c730aa7b4a7..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql +++ /dev/null @@ -1,70 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS tab SYNC; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET allow_suspicious_low_cardinality_types=1; -SET mutations_sync = 2; - -CREATE TABLE tab -( - a String, - b UInt64, - c Int64, - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE tab; - -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'Test statistics count_min:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min; -ALTER TABLE tab ADD STATISTICS c TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - - -SELECT 'Test statistics multi-types:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - -DROP TABLE IF EXISTS tab SYNC; - - -SELECT 'Test LowCardinality and Nullable data type:'; -DROP TABLE IF EXISTS tab2 SYNC; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE tab2 -( - a LowCardinality(Int64) STATISTICS(count_min), - b Nullable(Int64) STATISTICS(count_min), - c LowCardinality(Nullable(Int64)) STATISTICS(count_min), - pk String, -) Engine = MergeTree() ORDER BY pk; - -select name from system.tables where name = 'tab2' and database = currentDatabase(); - -DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference index a7ff5caa0b0..0e453b0ee8a 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.reference +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -1,31 +1,6 @@ -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index fe612efe2ac..32b56a842b7 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -1,59 +1,195 @@ --- Tests that various DDL statements create/drop/materialize statistics +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests that DDL statements which create / drop / materialize statistics + +SET mutations_sync = 1; DROP TABLE IF EXISTS tab; +-- Error case: Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; + +-- Error case: Unknown statistics types are rejected +CREATE TABLE tab (col Float64 STATISTICS(no_statistics_type)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +-- Error case: The same statistics type can't exist more than once on a column +CREATE TABLE tab (col Float64 STATISTICS(tdigest, tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +SET allow_suspicious_low_cardinality_types = 1; + +-- Statistics can only be created on columns of specific data types (depending on the statistics kind), (*) + +-- tdigest requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- uniq requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- CREATE TABLE was easy, ALTER is more fun CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f32 Float32, + s String, + a Array(Float64) +) +Engine = MergeTree() +ORDER BY tuple(); +-- Error case: Unknown statistics types are rejected +-- (relevant for ADD and MODIFY) +ALTER TABLE tab ADD STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +-- for some reason, ALTER TABLE tab MODIFY STATISTICS IF EXISTS is not supported + +-- Error case: The same statistics type can't exist more than once on a column +-- (relevant for ADD and MODIFY) +-- Create the same statistics object twice +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +-- Create an statistics which exists already +ALTER TABLE tab ADD STATISTICS f64_tdigest TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64_tdigest TYPE tdigest; -- no-op +ALTER TABLE tab MODIFY STATISTICS f64_tdigest TYPE tdigest; -- no-op + +-- Error case: Column does not exist +-- (relevant for ADD, MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab ADD STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab CLEAR STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS no_such_column; -- { serverError ILLEGAL_STATISTICS } + +-- Error case: Column exists but has no statistics +-- (relevant for MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab MODIFY STATISTICS s TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab CLEAR STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS s; -- { serverError ILLEGAL_STATISTICS } + +-- We don't check systematically that that statistics can only be created via ALTER ADD STATISTICS on columns of specific data types (the +-- internal type validation code is tested already above, (*)). Only do a rudimentary check for each statistics type with a data type that +-- works and one that doesn't work. +-- tdigest +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +-- uniq +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +-- count_min +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } + +-- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing +-- statistics objects (e.g. tdigest can be created on Float64 and UInt64) +ALTER TABLE tab MODIFY COLUMN f64_tdigest UInt64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } + +-- Finally, do a full-circle test of a good case. Print table definition after each step. +-- Intentionally specifying _two_ columns and _two_ statistics types to have that also tested. +SHOW CREATE TABLE tab; +ALTER TABLE tab ADD STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab MODIFY STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab CLEAR STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab MATERIALIZE STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab DROP STATISTICS f64, f32; SHOW CREATE TABLE tab; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; -SELECT count(*) FROM tab WHERE b < NULL and a < '10'; - -ALTER TABLE tab DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab MATERIALIZE STATISTICS a, b; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -OPTIMIZE TABLE tab FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -ALTER TABLE tab RENAME COLUMN b TO c; -SHOW CREATE TABLE tab; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference new file mode 100644 index 00000000000..eb5e685597c --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference @@ -0,0 +1,12 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After truncate, insert, and materialize + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql new file mode 100644 index 00000000000..33a5f9052ba --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql @@ -0,0 +1,36 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET allow_analyzer = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +TRUNCATE TABLE tab; +SET mutations_sync = 2; +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +SELECT 'After truncate, insert, and materialize'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.reference b/tests/queries/0_stateless/02864_statistics_exception.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql deleted file mode 100644 index 289ffee6600..00000000000 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ /dev/null @@ -1,55 +0,0 @@ --- Tests creating/dropping/materializing statistics produces the right exceptions. - -DROP TABLE IF EXISTS tab; - --- Can't create statistics when allow_experimental_statistics = 0 -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - -SET allow_experimental_statistics = 1; - --- The same type of statistics can't exist more than once on a column -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest, tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- Unknown statistics types are rejected -CREATE TABLE tab -( - a Float64 STATISTICS(no_statistics_type) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- tDigest statistics can only be created on numeric columns -CREATE TABLE tab -( - a String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - -CREATE TABLE tab -( - a Float64, - b String -) Engine = MergeTree() ORDER BY tuple(); - -ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; --- Statistics can be created only on integer columns -ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS a; -ALTER TABLE tab DROP STATISTICS IF EXISTS a; -ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; -ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } - -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference deleted file mode 100644 index 5e969cf41cb..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference +++ /dev/null @@ -1,10 +0,0 @@ -10 -10 -10 -statistics not used Condition less(b, 10_UInt8) moved to PREWHERE -statistics not used Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE -2 0 diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql deleted file mode 100644 index 6606cff263f..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql +++ /dev/null @@ -1,52 +0,0 @@ --- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). - -DROP TABLE IF EXISTS tab; - -SET enable_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -SET materialize_statistics_on_insert = 0; - -CREATE TABLE tab -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE tab FINAL; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE tab; -SET mutations_sync = 2; - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE tab MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE tab; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM tab%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO tab SELECT%' - AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference new file mode 100644 index 00000000000..1c2abd47aaf --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.reference @@ -0,0 +1,92 @@ +u64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +u64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +f64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +0 +0 +0 +f64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +80 +80 +80 +dt and = +0 +0 +0 +0 +10 +10 +10 +10 +dt and < +10000 +10000 +10000 +70 +70 +70 +70 +b and = +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +0 +0 +0 +0 +s and = +10 +10 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql new file mode 100644 index 00000000000..3e754dfb1de --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -0,0 +1,214 @@ +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types. + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + u64 UInt64, + u64_tdigest UInt64 STATISTICS(tdigest), + u64_count_min UInt64 STATISTICS(count_min), + u64_uniq UInt64 STATISTICS(uniq), + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f64_count_min Float64 STATISTICS(count_min), + f64_uniq Float64 STATISTICS(uniq), + dt DateTime, + dt_tdigest DateTime STATISTICS(tdigest), + dt_count_min DateTime STATISTICS(count_min), + dt_uniq DateTime STATISTICS(uniq), + b Bool, + b_tdigest Bool STATISTICS(tdigest), + b_count_min Bool STATISTICS(count_min), + b_uniq Bool STATISTICS(uniq), + s String, + -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest + s_count_min String STATISTICS(count_min) + -- s_uniq String STATISTICS(uniq), -- not supported by uniq +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab +-- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000; +SELECT number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 2, + number % 2, + number % 2, + number % 2, + toString(number % 1000), + toString(number % 1000) +FROM system.numbers LIMIT 10000; + +-- u64 ---------------------------------------------------- + +SELECT 'u64 and ='; + +SELECT count(*) FROM tab WHERE u64 = 7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7; +SELECT count(*) FROM tab WHERE u64_count_min = 7; +SELECT count(*) FROM tab WHERE u64_uniq = 7; + +SELECT count(*) FROM tab WHERE u64 = 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE u64_count_min = 7.7; +SELECT count(*) FROM tab WHERE u64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE u64 = '7'; +SELECT count(*) FROM tab WHERE u64_tdigest = '7'; +SELECT count(*) FROM tab WHERE u64_count_min = '7'; +SELECT count(*) FROM tab WHERE u64_uniq = '7'; + +SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } + +SELECT 'u64 and <'; + +SELECT count(*) FROM tab WHERE u64 < 7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7; +SELECT count(*) FROM tab WHERE u64_count_min < 7; +SELECT count(*) FROM tab WHERE u64_uniq < 7; + +SELECT count(*) FROM tab WHERE u64 < 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE u64_count_min < 7.7; +SELECT count(*) FROM tab WHERE u64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE u64 < '7'; +-- SELECT count(*) FROM tab WHERE u64_tdigest < '7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE u64_count_min < '7'; +SELECT count(*) FROM tab WHERE u64_uniq < '7'; + +SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } + +-- f64 ---------------------------------------------------- + +SELECT 'f64 and ='; + +SELECT count(*) FROM tab WHERE f64 = 7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7; +SELECT count(*) FROM tab WHERE f64_count_min = 7; +SELECT count(*) FROM tab WHERE f64_uniq = 7; + +SELECT count(*) FROM tab WHERE f64 = 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE f64_count_min = 7.7; +SELECT count(*) FROM tab WHERE f64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE f64 = '7'; +-- SELECT count(*) FROM tab WHERE f64_tdigest = '7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_count_min = '7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7'; + +SELECT count(*) FROM tab WHERE f64 = '7.7'; +-- SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; + +SELECT 'f64 and <'; + +SELECT count(*) FROM tab WHERE f64 < 7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7; +SELECT count(*) FROM tab WHERE f64_count_min < 7; +SELECT count(*) FROM tab WHERE f64_uniq < 7; + +SELECT count(*) FROM tab WHERE f64 < 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE f64_count_min < 7.7; +SELECT count(*) FROM tab WHERE f64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE f64 < '7'; +-- SELECT count(*) FROM tab WHERE f64_tdigest < '7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_count_min < '7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7'; + +SELECT count(*) FROM tab WHERE f64 < '7.7'; +-- SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; + +-- dt ---------------------------------------------------- + +SELECT 'dt and ='; + +SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt = 7; +SELECT count(*) FROM tab WHERE dt_tdigest = 7; +SELECT count(*) FROM tab WHERE dt_count_min = 7; +SELECT count(*) FROM tab WHERE dt_uniq = 7; + +SELECT 'dt and <'; + +SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; +-- SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt < 7; +SELECT count(*) FROM tab WHERE dt_tdigest < 7; +SELECT count(*) FROM tab WHERE dt_count_min < 7; +SELECT count(*) FROM tab WHERE dt_uniq < 7; + +-- b ---------------------------------------------------- + +SELECT 'b and ='; + +SELECT count(*) FROM tab WHERE b = true; +SELECT count(*) FROM tab WHERE b_tdigest = true; +SELECT count(*) FROM tab WHERE b_count_min = true; +SELECT count(*) FROM tab WHERE b_uniq = true; + +SELECT count(*) FROM tab WHERE b = 'true'; +SELECT count(*) FROM tab WHERE b_tdigest = 'true'; +SELECT count(*) FROM tab WHERE b_count_min = 'true'; +SELECT count(*) FROM tab WHERE b_uniq = 'true'; + +SELECT count(*) FROM tab WHERE b = 1; +SELECT count(*) FROM tab WHERE b_tdigest = 1; +SELECT count(*) FROM tab WHERE b_count_min = 1; +SELECT count(*) FROM tab WHERE b_uniq = 1; + +SELECT count(*) FROM tab WHERE b = 1.1; +SELECT count(*) FROM tab WHERE b_tdigest = 1.1; +SELECT count(*) FROM tab WHERE b_count_min = 1.1; +SELECT count(*) FROM tab WHERE b_uniq = 1.1; + +-- s ---------------------------------------------------- + +SELECT 's and ='; + +SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported + +SELECT count(*) FROM tab WHERE s = '7'; +-- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = '7'; +-- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.reference b/tests/queries/0_stateless/02864_statistics_uniq.reference deleted file mode 100644 index 77786dbdd8c..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.reference +++ /dev/null @@ -1,35 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After modify TDigest - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed) -After drop - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql deleted file mode 100644 index 0f5f353c045..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ /dev/null @@ -1,73 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET mutations_sync = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Int64 STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; -INSERT INTO t1 select 0, 0, 11, generateUUIDv4(); - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT 'After modify TDigest'; -ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest; -ALTER TABLE t1 MATERIALIZE STATISTICS c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - - -ALTER TABLE t1 DROP STATISTICS c; - -SELECT 'After drop'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE t2 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c LowCardinality(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t2; -DROP TABLE IF EXISTS t3; - -CREATE TABLE t3 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Nullable(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t3; - diff --git a/tests/queries/0_stateless/02864_statistics_usage.reference b/tests/queries/0_stateless/02864_statistics_usage.reference new file mode 100644 index 00000000000..a9f669b88c1 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.reference @@ -0,0 +1,20 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After add and materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_usage.sql b/tests/queries/0_stateless/02864_statistics_usage.sql new file mode 100644 index 00000000000..f936854df44 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.sql @@ -0,0 +1,42 @@ +-- Test that the optimizer picks up column statistics +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; +SET allow_analyzer = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab DROP STATISTICS a, b; +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After add and materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab RENAME COLUMN b TO c; +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then c (statistics used) + +DROP TABLE IF EXISTS tab; From 55b2000d38e0bc6282714fdb1204d450437433ec Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 8 Aug 2024 13:58:43 +0000 Subject: [PATCH 062/132] Fix fasttest --- .../02864_statistics_delayed_materialization_in_merge.sql | 2 +- tests/queries/0_stateless/02864_statistics_usage.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql index 33a5f9052ba..d469a4c2036 100644 --- a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS tab; SET allow_experimental_statistics = 1; SET allow_statistics_optimize = 1; -SET allow_analyzer = 1; +SET enable_analyzer = 1; SET materialize_statistics_on_insert = 0; diff --git a/tests/queries/0_stateless/02864_statistics_usage.sql b/tests/queries/0_stateless/02864_statistics_usage.sql index f936854df44..4956bd27e87 100644 --- a/tests/queries/0_stateless/02864_statistics_usage.sql +++ b/tests/queries/0_stateless/02864_statistics_usage.sql @@ -6,7 +6,7 @@ SET allow_experimental_statistics = 1; SET allow_statistics_optimize = 1; SET mutations_sync = 1; -SET allow_analyzer = 1; +SET enable_analyzer = 1; DROP TABLE IF EXISTS tab; From 34bbf3d9c49f0f563eee8973ac3c3bae0e64c6de Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 8 Aug 2024 17:12:27 +0200 Subject: [PATCH 063/132] Use async inserts --- docker/test/stateless/run.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 830a02a64a3..c81f33ace01 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -192,8 +192,9 @@ ENGINE = MergeTree ORDER BY tuple()" # create minio log webhooks for both audit and server logs -./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" -./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" +# use async inserts to avoid creating too many parts +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" max_retries=100 retry=1 @@ -375,6 +376,12 @@ done # collect minio audit and server logs +has_async_inserts=$(clickhouse-client -q "SELECT count() FROM system.asynchronous_inserts WHERE table = 'minio_audit_logs' OR table = 'minio_server_logs'") +if [[ has_async_inserts -eq 1 ]]; then + echo "Waiting for async inserts to flush" + sleep 5 +fi + clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" From 33ba78ee42bc85690dce69c82fd51d723a6d2eab Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 8 Aug 2024 17:47:12 +0200 Subject: [PATCH 064/132] Update test.py --- tests/integration/test_drop_is_lock_free/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 1bb8767a9a0..3855bc21f90 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -176,7 +176,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0, max_threads=1; """, query_id=query_id, ) From f9f13a8e415e9f2130281d069c28dd6e9a68be75 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 8 Aug 2024 16:27:25 +0000 Subject: [PATCH 065/132] enable setting optimize_functions_to_subcolumns by default --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f04c696645a..f0a8d0c2647 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -605,7 +605,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 41e4ac2e154..b00d0964e01 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -83,6 +83,7 @@ static std::initializer_list Date: Thu, 8 Aug 2024 11:20:55 -0600 Subject: [PATCH 066/132] fixed typos --- docs/en/sql-reference/functions/geo/polygon.md | 2 +- src/Functions/geometryConverters.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index c054e05d39c..be9e9810626 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -197,7 +197,7 @@ The function returns a ClickHouse internal representation of the multilinestring ### Example ```sql -SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))')); +SELECT readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3), (4 4, 5 5, 6 6))'); ``` ```response diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h index bf975017a6d..f1156d81f01 100644 --- a/src/Functions/geometryConverters.h +++ b/src/Functions/geometryConverters.h @@ -471,7 +471,7 @@ static void callOnGeometryDataType(DataTypePtr type, F && f) return f(ConverterType>()); /// We should take the name into consideration to avoid ambiguity. - /// Because for example both MultiLineString and Polygon are resolved to Array(Tuple(Point)). + /// Because for example both MultiLineString and Polygon are resolved to Array(Array(Point)). else if (factory.get("MultiLineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "MultiLineString") return f(ConverterType>()); From 2bd7606280b0714febc1d034233ee0fb9a3532a6 Mon Sep 17 00:00:00 2001 From: Jacob Reckhard Date: Thu, 8 Aug 2024 11:39:06 -0600 Subject: [PATCH 067/132] added additional tests --- .../03215_multilinestring_geometry.reference | 10 ++++++++++ .../0_stateless/03215_multilinestring_geometry.sql | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.reference b/tests/queries/0_stateless/03215_multilinestring_geometry.reference index f4c5774018e..f20e21d86f6 100644 --- a/tests/queries/0_stateless/03215_multilinestring_geometry.reference +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.reference @@ -15,3 +15,13 @@ MULTILINESTRING((1 1,2 2,3 3,1 1),(1 0,2 0,3 0)) WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); POLYGON((1 1,2 2,3 3,1 1)) String [[(1,1),(2,2),(3,3),(1,1)]] Polygon +-- Non constant tests + +CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String) Engine = Memory; +INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64))) +select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t; +POLYGON((1 1,2 2,3 3,1 1)) [[(1,1),(2,2),(3,3),(1,1)]] 1 +POLYGON((1 1,2 2,3 3,1 1),(1 0,2 0,3 0,1 0)) [[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] 1 diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.sql b/tests/queries/0_stateless/03215_multilinestring_geometry.sql index 71344920c52..6081e081fb3 100644 --- a/tests/queries/0_stateless/03215_multilinestring_geometry.sql +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.sql @@ -10,3 +10,15 @@ SELECT wkt(readWKTMultiLineString('MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, -- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. WITH wkt(CAST([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'Array(Array(Tuple(Float64, Float64)))')) as x SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); + +-- Non constant tests + +CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String) Engine = Memory; +INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); + +-- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. +-- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64))) +select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t; + + From b0b150c599052d596b8c746a31ba50019601b6a0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 8 Aug 2024 18:29:20 +0000 Subject: [PATCH 068/132] Remove workarounds for solved bugs --- .../02864_statistics_predicates.reference | 6 ++++++ .../0_stateless/02864_statistics_predicates.sql | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference index 1c2abd47aaf..ffbd7269e05 100644 --- a/tests/queries/0_stateless/02864_statistics_predicates.reference +++ b/tests/queries/0_stateless/02864_statistics_predicates.reference @@ -23,6 +23,7 @@ u64 and < 70 70 70 +70 f64 and = 10 10 @@ -35,6 +36,8 @@ f64 and = 10 10 10 +10 +0 0 0 0 @@ -50,6 +53,8 @@ f64 and < 70 70 70 +70 +80 80 80 80 @@ -66,6 +71,7 @@ dt and < 10000 10000 10000 +10000 70 70 70 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql index 3e754dfb1de..779116cf19a 100644 --- a/tests/queries/0_stateless/02864_statistics_predicates.sql +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -92,7 +92,7 @@ SELECT count(*) FROM tab WHERE u64_count_min < 7.7; SELECT count(*) FROM tab WHERE u64_uniq < 7.7; SELECT count(*) FROM tab WHERE u64 < '7'; --- SELECT count(*) FROM tab WHERE u64_tdigest < '7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE u64_tdigest < '7'; SELECT count(*) FROM tab WHERE u64_count_min < '7'; SELECT count(*) FROM tab WHERE u64_uniq < '7'; @@ -116,12 +116,12 @@ SELECT count(*) FROM tab WHERE f64_count_min = 7.7; SELECT count(*) FROM tab WHERE f64_uniq = 7.7; SELECT count(*) FROM tab WHERE f64 = '7'; --- SELECT count(*) FROM tab WHERE f64_tdigest = '7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_tdigest = '7'; SELECT count(*) FROM tab WHERE f64_count_min = '7'; SELECT count(*) FROM tab WHERE f64_uniq = '7'; SELECT count(*) FROM tab WHERE f64 = '7.7'; --- SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; @@ -138,12 +138,12 @@ SELECT count(*) FROM tab WHERE f64_count_min < 7.7; SELECT count(*) FROM tab WHERE f64_uniq < 7.7; SELECT count(*) FROM tab WHERE f64 < '7'; --- SELECT count(*) FROM tab WHERE f64_tdigest < '7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_tdigest < '7'; SELECT count(*) FROM tab WHERE f64_count_min < '7'; SELECT count(*) FROM tab WHERE f64_uniq < '7'; SELECT count(*) FROM tab WHERE f64 < '7.7'; --- SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; @@ -164,7 +164,7 @@ SELECT count(*) FROM tab WHERE dt_uniq = 7; SELECT 'dt and <'; SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; --- SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; -- BOOM (#67742) +SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; From fe234bd88f3eb1dca8cdb8b217606abfbcea1d54 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 20:56:51 +0200 Subject: [PATCH 069/132] Fix test --- tests/queries/0_stateless/03218_materialize_msan.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03218_materialize_msan.sql b/tests/queries/0_stateless/03218_materialize_msan.sql index b41300ea1e3..7e7043e687b 100644 --- a/tests/queries/0_stateless/03218_materialize_msan.sql +++ b/tests/queries/0_stateless/03218_materialize_msan.sql @@ -1,3 +1,5 @@ +SET enable_analyzer = 1; + SELECT materialize([(NULL, '11\01111111\011111', '1111')]) AS t, (t[1048576]).2, From bc20b637eae8bba72ecfab5256c7eace40586976 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 8 Aug 2024 21:47:23 +0200 Subject: [PATCH 070/132] Add missing file --- .../test_broken_projections/config.d/dont_start_broken.xml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tests/integration/test_broken_projections/config.d/dont_start_broken.xml diff --git a/tests/integration/test_broken_projections/config.d/dont_start_broken.xml b/tests/integration/test_broken_projections/config.d/dont_start_broken.xml new file mode 100644 index 00000000000..9603cdc7e3e --- /dev/null +++ b/tests/integration/test_broken_projections/config.d/dont_start_broken.xml @@ -0,0 +1,6 @@ + + + + 0 + + From 6f346c161463330c31d0bce2566df6875cf1d614 Mon Sep 17 00:00:00 2001 From: Jacob Reckhard Date: Thu, 8 Aug 2024 15:20:07 -0600 Subject: [PATCH 071/132] added order for repeatablility of test --- .../03215_multilinestring_geometry.reference | 11 +++++++---- .../0_stateless/03215_multilinestring_geometry.sql | 10 ++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.reference b/tests/queries/0_stateless/03215_multilinestring_geometry.reference index f20e21d86f6..9702dd6d6f8 100644 --- a/tests/queries/0_stateless/03215_multilinestring_geometry.reference +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.reference @@ -17,11 +17,14 @@ SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); POLYGON((1 1,2 2,3 3,1 1)) String [[(1,1),(2,2),(3,3),(1,1)]] Polygon -- Non constant tests -CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String) Engine = Memory; -INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); -INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +DROP TABLE IF EXISTS t; +CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory; +INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))'); -- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. -- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64))) -select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t; +select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord; POLYGON((1 1,2 2,3 3,1 1)) [[(1,1),(2,2),(3,3),(1,1)]] 1 POLYGON((1 1,2 2,3 3,1 1),(1 0,2 0,3 0,1 0)) [[(1,1),(2,2),(3,3),(1,1)],[(1,0),(2,0),(3,0)]] 1 +POLYGON((1 0,2 1,3 0,4 1,5 0,6 1,7 0,8 1,9 0,10 1,1 0)) [[(1,0),(2,1),(3,0),(4,1),(5,0),(6,1),(7,0),(8,1),(9,0),(10,1)]] 1 diff --git a/tests/queries/0_stateless/03215_multilinestring_geometry.sql b/tests/queries/0_stateless/03215_multilinestring_geometry.sql index 6081e081fb3..cf4ef15f63d 100644 --- a/tests/queries/0_stateless/03215_multilinestring_geometry.sql +++ b/tests/queries/0_stateless/03215_multilinestring_geometry.sql @@ -13,12 +13,14 @@ SELECT x, toTypeName(x), readWKTPolygon(x) as y, toTypeName(y); -- Non constant tests -CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String) Engine = Memory; -INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); -INSERT INTO t (shape, wkt_string) VALUES ([[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +DROP TABLE IF EXISTS t; +CREATE TABLE IF NOT EXISTS t (shape Array(Array(Tuple(Float64, Float64))), wkt_string String, ord Float64) Engine = Memory; +INSERT INTO t (ord, shape, wkt_string) VALUES (1, [[(1, 1), (2, 2), (3, 3), (1, 1)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (2, [[(1, 1), (2, 2), (3, 3), (1, 1)], [(1, 0), (2, 0), (3, 0)]], 'MULTILINESTRING ((1 1, 2 2, 3 3, 1 1), (1 0, 2 0, 3 0))'); +INSERT INTO t (ord, shape, wkt_string) VALUES (3, [[(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (10, 1)]], 'MULTILINESTRING ((1 0, 2 1, 3 0, 4 1, 5 0, 6 1, 7 0, 8 1, 9 0, 10 1))'); -- Native Array(Array(Tuple(Float64, Float64))) is treated as Polygon, not as MultiLineString. -- but reading MultiLineString should still return an Array(Array(Tuple(Float64, Float64))) -select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t; +select wkt(shape), readWKTMultiLineString(wkt_string), readWKTMultiLineString(wkt_string) = shape from t order by ord; From 248da0341aca537104486243b84ca230e9c4f9fb Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 8 Aug 2024 15:54:56 +0800 Subject: [PATCH 072/132] fixed --- .../HashJoin/HashJoinMethodsImpl.h | 53 ++++++++++++++----- src/Interpreters/joinDispatch.h | 8 +-- ..._join_on_inequal_expression_fast.reference | 1 - 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h index 5fefe53d145..aedd24630d1 100644 --- a/src/Interpreters/HashJoin/HashJoinMethodsImpl.h +++ b/src/Interpreters/HashJoin/HashJoinMethodsImpl.h @@ -1,11 +1,12 @@ #pragma once #include + namespace DB { namespace ErrorCodes { - extern const int UNSUPPORTED_JOIN_KEYS; - extern const int LOGICAL_ERROR; +extern const int UNSUPPORTED_JOIN_KEYS; +extern const int LOGICAL_ERROR; } template size_t HashJoinMethods::insertFromBlockImpl( @@ -156,7 +157,6 @@ Block HashJoinMethods::joinBlockImpl( block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); } } - return remaining_block; } @@ -596,7 +596,7 @@ ColumnPtr HashJoinMethods::buildAdditionalFilter template template -size_t HashJoinMethods::joinRightColumnsWithAddtitionalFilter( +size_t HashJoinMethods::joinRightColumnsWithAddtitionalFilter( std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, @@ -662,6 +662,8 @@ size_t HashJoinMethods::joinRightColumnsWithAddti { auto & mapped = find_result.getMapped(); find_results.push_back(find_result); + /// We don't add missing in addFoundRowAll here. we will add it after filter is applied. + /// it's different from `joinRightColumns`. if (flag_per_row) addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); else @@ -682,32 +684,54 @@ size_t HashJoinMethods::joinRightColumnsWithAddti for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) { bool any_matched = false; - /// For right join, flag_per_row is true, we need mark used flags for each row. + /// right/full join or multiple disjuncts, we need to mark used flags for each row. if (flag_per_row) { for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) { if (filter_flags[replicated_row]) { - any_matched = true; if constexpr (join_features.is_semi_join || join_features.is_any_join) { - auto used_once = used_flags.template setUsedOnce( - selected_right_row_it->block, selected_right_row_it->row_num, 0); - if (used_once) + /// For LEFT/INNER SEMI/ANY JOIN, we need to add only first appeared row from left, + if constexpr (join_features.left || join_features.inner) { - total_added_rows += 1; - added_columns.appendFromBlock( - *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + if (!any_matched) + { + // For inner join, we need mark each right row'flag, because we only use each right row once. + auto used_once = used_flags.template setUsedOnce( + selected_right_row_it->block, selected_right_row_it->row_num, 0); + if (used_once) + { + any_matched = true; + total_added_rows += 1; + added_columns.appendFromBlock( + *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + } + } + } + else + { + auto used_once = used_flags.template setUsedOnce( + selected_right_row_it->block, selected_right_row_it->row_num, 0); + if (used_once) + { + any_matched = true; + total_added_rows += 1; + added_columns.appendFromBlock( + *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); + } } } else if constexpr (join_features.is_anti_join) { + any_matched = true; if constexpr (join_features.right && join_features.need_flags) used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); } else { + any_matched = true; total_added_rows += 1; added_columns.appendFromBlock( *selected_right_row_it->block, selected_right_row_it->row_num, join_features.add_missing); @@ -715,6 +739,7 @@ size_t HashJoinMethods::joinRightColumnsWithAddti selected_right_row_it->block, selected_right_row_it->row_num, 0); } } + ++selected_right_row_it; } } @@ -892,7 +917,8 @@ void HashJoinMethods::correctNullabilityInplace( } template -void HashJoinMethods::correctNullabilityInplace(ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) +void HashJoinMethods::correctNullabilityInplace( + ColumnWithTypeAndName & column, bool nullable, const IColumn::Filter & negative_null_map) { if (nullable) { @@ -908,4 +934,3 @@ void HashJoinMethods::correctNullabilityInplace( JoinCommon::removeColumnNullability(column); } } - diff --git a/src/Interpreters/joinDispatch.h b/src/Interpreters/joinDispatch.h index 5d4bd8f92e5..4aabc49c29b 100644 --- a/src/Interpreters/joinDispatch.h +++ b/src/Interpreters/joinDispatch.h @@ -16,11 +16,13 @@ namespace DB /// When only need to match only one row from right table, use HashJoin::MapsOne. For example, LEFT ANY/SEMI/ANTI. /// /// HashJoin::MapsAll will store all rows for each key in the map. It is used when need to match multiple rows from right table. -/// For example, RIGHT ANY/ALL, FULL JOIN, INNER JOIN. +/// For example, LEFT ALL, INNER ALL, RIGHT ALL/ANY. /// /// prefer_use_maps_all is true when there is mixed inequal condition in the join condition. For example, `t1.a = t2.a AND t1.b > t2.b`. /// In this case, we need to use HashJoin::MapsAll to store all rows for each key in the map. We will select all matched rows from the map /// and filter them by `t1.b > t2.b`. +/// +/// flagged indicates whether we need to store flags for each row whether it has been used in the join. See JoinUsedFlags.h. template struct MapGetter; @@ -30,7 +32,7 @@ template struct MapGetter struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template <> struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; @@ -43,7 +45,7 @@ template struct MapGetter struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; -template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; +template <> struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = false; }; template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; template struct MapGetter { using Map = HashJoin::MapsAll; static constexpr bool flagged = true; }; template struct MapGetter { using Map = HashJoin::MapsOne; static constexpr bool flagged = false; }; diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference index aa8d4103db2..a70e70ef7e9 100644 --- a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -742,4 +742,3 @@ key1 b 2 3 2 key1 c 3 2 1 key1 d 4 7 2 key2 a2 1 1 1 -key4 f 2 3 4 From 54cd980e13d70e84e2c0aad51420a2dd31217131 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Aug 2024 09:44:30 +0200 Subject: [PATCH 073/132] Increase queue size and fix tests --- docker/test/stateless/run.sh | 11 +++-------- .../0_stateless/02726_async_insert_flush_queue.sql | 4 +++- .../0_stateless/02726_async_insert_flush_stress.sh | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c81f33ace01..cd2a61cce4e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -193,8 +193,8 @@ ORDER BY tuple()" # create minio log webhooks for both audit and server logs # use async inserts to avoid creating too many parts -./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" -./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=300000 +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=300000 max_retries=100 retry=1 @@ -376,12 +376,7 @@ done # collect minio audit and server logs -has_async_inserts=$(clickhouse-client -q "SELECT count() FROM system.asynchronous_inserts WHERE table = 'minio_audit_logs' OR table = 'minio_server_logs'") -if [[ has_async_inserts -eq 1 ]]; then - echo "Waiting for async inserts to flush" - sleep 5 -fi - +clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 97d644fa4d6..5d941adcb81 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -30,7 +30,9 @@ SELECT count() FROM t_async_inserts_flush; SYSTEM FLUSH ASYNC INSERT QUEUE; -SELECT count() FROM system.asynchronous_inserts; +SELECT count() FROM system.asynchronous_inserts +WHERE database = currentDatabase() AND table = 't_async_inserts_flush'; + SELECT count() FROM t_async_inserts_flush; DROP TABLE t_async_inserts_flush; diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 876766d0780..61bbbd620f0 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -91,5 +91,5 @@ flush1 $TIMEOUT & wait ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts WHERE database = currentDatabase() AND table = 'async_inserts'" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"; From 47f429a52435d84b3e1cee06202e743c11bcba0d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 8 Aug 2024 20:54:29 +0000 Subject: [PATCH 074/132] Proper CMake for libfiu --- CMakeLists.txt | 2 +- contrib/libfiu-cmake/CMakeLists.txt | 27 ++++++++++++++------------- src/CMakeLists.txt | 4 ++-- src/Common/FailPoint.cpp | 6 ++++-- src/Common/FailPoint.h | 9 ++++----- src/Common/config.h.in | 2 +- src/configure_config.cmake | 4 ++-- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b4e0484ab1..134f3afd727 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -402,7 +402,7 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") set(ENABLE_GWP_ASAN OFF) endif () -option (ENABLE_FIU "Enable Fiu" ON) +option (ENABLE_LIBFIU "Enable libfiu" ON) option(WERROR "Enable -Werror compiler option" ON) diff --git a/contrib/libfiu-cmake/CMakeLists.txt b/contrib/libfiu-cmake/CMakeLists.txt index e805491edbb..eab55087c98 100644 --- a/contrib/libfiu-cmake/CMakeLists.txt +++ b/contrib/libfiu-cmake/CMakeLists.txt @@ -1,20 +1,21 @@ -if (NOT ENABLE_FIU) - message (STATUS "Not using fiu") +if (NOT ENABLE_LIBFIU) + message (STATUS "Not using libfiu") return () endif () -set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") +set(LIBFIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/") -set(FIU_SOURCES - ${FIU_DIR}/libfiu/fiu.c - ${FIU_DIR}/libfiu/fiu-rc.c - ${FIU_DIR}/libfiu/backtrace.c - ${FIU_DIR}/libfiu/wtable.c +set(LIBFIU_SOURCES + ${LIBFIU_DIR}/libfiu/fiu.c + ${LIBFIU_DIR}/libfiu/fiu-rc.c + ${LIBFIU_DIR}/libfiu/backtrace.c + ${LIBFIU_DIR}/libfiu/wtable.c ) -set(FIU_HEADERS "${FIU_DIR}/libfiu") +set(LIBFIU_HEADERS "${LIBFIU_DIR}/libfiu") -add_library(_fiu ${FIU_SOURCES}) -target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE) -target_include_directories(_fiu PUBLIC ${FIU_HEADERS}) -add_library(ch_contrib::fiu ALIAS _fiu) +add_library(_libfiu ${LIBFIU_SOURCES}) +target_compile_definitions(_libfiu PUBLIC DUMMY_BACKTRACE) +target_compile_definitions(_libfiu PUBLIC FIU_ENABLE) +target_include_directories(_libfiu PUBLIC ${LIBFIU_HEADERS}) +add_library(ch_contrib::libfiu ALIAS _libfiu) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98dd0601a1b..db3ef0f489f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -353,8 +353,8 @@ target_link_libraries(clickhouse_common_io Poco::Foundation ) -if (TARGET ch_contrib::fiu) - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::fiu) +if (TARGET ch_contrib::libfiu) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::libfiu) endif() if (TARGET ch_contrib::cpuid) diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 0b1ec552d43..b2fcbc77c56 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -7,6 +7,8 @@ #include #include +#include "config.h" + namespace DB { @@ -15,7 +17,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; }; -#if FIU_ENABLE +#if USE_LIBFIU static struct InitFiu { InitFiu() @@ -135,7 +137,7 @@ void FailPointInjection::pauseFailPoint(const String & fail_point_name) void FailPointInjection::enableFailPoint(const String & fail_point_name) { -#if FIU_ENABLE +#if USE_LIBFIU #define SUB_M(NAME, flags, pause) \ if (fail_point_name == FailPoints::NAME) \ { \ diff --git a/src/Common/FailPoint.h b/src/Common/FailPoint.h index b3e1214d597..1af13d08553 100644 --- a/src/Common/FailPoint.h +++ b/src/Common/FailPoint.h @@ -1,17 +1,16 @@ #pragma once -#include "config.h" #include #include #include +#include "config.h" + #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdocumentation" #pragma clang diagnostic ignored "-Wreserved-macro-identifier" - -#include -#include - +# include +# include #pragma clang diagnostic pop #include diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 6a0090130a3..56a067b06e8 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -59,7 +59,7 @@ #cmakedefine01 USE_SKIM #cmakedefine01 USE_PRQL #cmakedefine01 USE_ULID -#cmakedefine01 FIU_ENABLE +#cmakedefine01 USE_LIBFIU #cmakedefine01 USE_BCRYPT #cmakedefine01 USE_LIBARCHIVE #cmakedefine01 USE_POCKETFFT diff --git a/src/configure_config.cmake b/src/configure_config.cmake index d22bf674df4..721041bc11b 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -161,8 +161,8 @@ endif() if (TARGET ch_contrib::ssh) set(USE_SSH 1) endif() -if (TARGET ch_contrib::fiu) - set(FIU_ENABLE 1) +if (TARGET ch_contrib::libfiu) + set(USE_LIBFIU 1) endif() if (TARGET ch_contrib::libarchive) set(USE_LIBARCHIVE 1) From 30d8e407723f45ed79dc960604d77e8ee02f3edb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 8 Aug 2024 21:02:04 +0000 Subject: [PATCH 075/132] Fix referenced variable for vectorscan in system.build_options --- src/Storages/System/StorageSystemBuildOptions.cpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index a81bcb08bfc..3f84a7468fd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -36,7 +36,7 @@ const char * auto_config_build[] "USE_SSL", "@USE_SSL@", "OPENSSL_VERSION", "@OPENSSL_VERSION@", "OPENSSL_IS_BORING_SSL", "@OPENSSL_IS_BORING_SSL@", - "USE_VECTORSCAN", "@ENABLE_VECTORSCAN@", + "USE_VECTORSCAN", "@USE_VECTORSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", "USE_ODBC", "@USE_ODBC@", "USE_GRPC", "@USE_GRPC@", From b242a129f811c9838167d98df56d060ceac24b85 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 8 Aug 2024 21:03:42 +0000 Subject: [PATCH 076/132] Fix referenced variable for jemalloc in system.build_options --- src/Storages/System/StorageSystemBuildOptions.cpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 3f84a7468fd..6cecef5a7ad 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -21,7 +21,7 @@ const char * auto_config_build[] "BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@", "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", - "USE_JEMALLOC", "@ENABLE_JEMALLOC@", + "USE_JEMALLOC", "@USE_JEMALLOC@", "USE_ICU", "@USE_ICU@", "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", From a497a2391455de21dec19b365cc939defdc56b1e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 9 Aug 2024 08:33:11 +0000 Subject: [PATCH 077/132] Fix CMake for QPL --- contrib/qpl-cmake/CMakeLists.txt | 4 ---- src/CMakeLists.txt | 2 ++ src/Common/config.h.in | 1 + .../CompressionCodecDeflateQpl.cpp | 8 +++---- src/Compression/CompressionCodecDeflateQpl.h | 6 +++++ src/Compression/CompressionFactory.cpp | 22 +++++++++---------- .../System/StorageSystemBuildOptions.cpp.in | 2 +- src/configure_config.cmake | 3 +++ 8 files changed, 28 insertions(+), 20 deletions(-) diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt index e62612cff5a..89332ae0f7a 100644 --- a/contrib/qpl-cmake/CMakeLists.txt +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -728,10 +728,6 @@ add_library(_qpl STATIC ${LIB_DEPS}) target_include_directories(_qpl PUBLIC $ $) - -target_compile_definitions(_qpl - PUBLIC -DENABLE_QPL_COMPRESSION) - target_link_libraries(_qpl PRIVATE ch_contrib::accel-config) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index db3ef0f489f..eba04d93df5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -556,6 +556,8 @@ target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) if (TARGET ch_contrib::qpl) dbms_target_link_libraries(PUBLIC ch_contrib::qpl) + target_link_libraries (clickhouse_compression PUBLIC ch_contrib::qpl) + target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config) endif () if (TARGET ch_contrib::accel-config) diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 56a067b06e8..1680cde22a2 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -32,6 +32,7 @@ #cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN +#cmakedefine01 USE_QPL #cmakedefine01 USE_LIBURING #cmakedefine01 USE_AVRO #cmakedefine01 USE_CAPNP diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index f1b5b24e866..c82ee861a6f 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -1,7 +1,3 @@ -#ifdef ENABLE_QPL_COMPRESSION - -#include -#include #include #include #include @@ -11,6 +7,10 @@ #include #include #include +#include +#include + +#if USE_QPL #include "libaccel_config.h" diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h index 86fd9051bd8..d9abc0fb7e0 100644 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ b/src/Compression/CompressionCodecDeflateQpl.h @@ -4,6 +4,11 @@ #include #include #include + +#include "config.h" + +#if USE_QPL + #include namespace Poco @@ -117,3 +122,4 @@ private: }; } +#endif diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 2e7aa0d086f..fbb5664d441 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -1,20 +1,20 @@ -#include "config.h" - #include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include #include -#include -#include -#include +#include +#include +#include #include +#include "config.h" + namespace DB { @@ -179,7 +179,7 @@ void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecZSTDQAT(CompressionCodecFactory & factory); #endif void registerCodecMultiple(CompressionCodecFactory & factory); -#ifdef ENABLE_QPL_COMPRESSION +#if USE_QPL void registerCodecDeflateQpl(CompressionCodecFactory & factory); #endif @@ -209,7 +209,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); -#ifdef ENABLE_QPL_COMPRESSION +#if USE_QPL registerCodecDeflateQpl(*this); #endif registerCodecGCD(*this); diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 6cecef5a7ad..f7edfb17542 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -62,7 +62,7 @@ const char * auto_config_build[] "USE_ARROW", "@USE_ARROW@", "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", - "USE_QPL", "@ENABLE_QPL@", + "USE_QPL", "@USE_QPL@", "USE_QAT", "@ENABLE_QATLIB@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 721041bc11b..6782cc6d824 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -135,6 +135,9 @@ endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() +if (TARGET ch_contrib::qpl) + set(USE_QPL 1) +endif() if (TARGET ch_contrib::avrocpp) set(USE_AVRO 1) endif() From eec5fe087c273aba443d97824da0e7aadfd52cdd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 9 Aug 2024 08:50:52 +0000 Subject: [PATCH 078/132] Fix CMake for QATlib --- contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt | 4 ++-- src/CMakeLists.txt | 7 ++----- src/Common/config.h.in | 1 + src/Compression/CompressionCodecZSTDQAT.cpp | 5 ++++- src/Compression/CompressionFactory.cpp | 4 ++-- src/Storages/System/StorageSystemBuildOptions.cpp.in | 2 +- src/configure_config.cmake | 3 +++ 7 files changed, 15 insertions(+), 11 deletions(-) diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt index 72d21a8572b..fc18092f574 100644 --- a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -27,7 +27,7 @@ if (ENABLE_QAT_OUT_OF_TREE_BUILD) ${QAT_AL_INCLUDE_DIR} ${QAT_USDM_INCLUDE_DIR} ${ZSTD_LIBRARY_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0) add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) else () # In-tree build message(STATUS "Intel QATZSTD in-tree build") @@ -78,7 +78,7 @@ else () # In-tree build ${QAT_USDM_INCLUDE_DIR} ${ZSTD_LIBRARY_DIR} ${LIBQAT_HEADER_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DENABLE_ZSTD_QAT_CODEC -DINTREE) + target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) endif () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eba04d93df5..43092d10dd2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -560,12 +560,9 @@ if (TARGET ch_contrib::qpl) target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config) endif () -if (TARGET ch_contrib::accel-config) - dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) -endif () - -if (TARGET ch_contrib::qatzstd_plugin) +if (TARGET ch_contrib::accel-config AND TARGET ch_contrib::qatzstd_plugin) dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin) + dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin) endif () diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 1680cde22a2..e3f8882850f 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -33,6 +33,7 @@ #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN #cmakedefine01 USE_QPL +#cmakedefine01 USE_QATLIB #cmakedefine01 USE_LIBURING #cmakedefine01 USE_AVRO #cmakedefine01 USE_CAPNP diff --git a/src/Compression/CompressionCodecZSTDQAT.cpp b/src/Compression/CompressionCodecZSTDQAT.cpp index 5a4ef70a30a..e19b7e4a001 100644 --- a/src/Compression/CompressionCodecZSTDQAT.cpp +++ b/src/Compression/CompressionCodecZSTDQAT.cpp @@ -1,4 +1,6 @@ -#ifdef ENABLE_ZSTD_QAT_CODEC +#include "config.h" + +#if USE_QATLIB #include #include @@ -6,6 +8,7 @@ #include #include #include + #include #include diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index fbb5664d441..ac00f571568 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -175,7 +175,7 @@ void registerCodecNone(CompressionCodecFactory & factory); void registerCodecLZ4(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecZSTD(CompressionCodecFactory & factory); -#ifdef ENABLE_ZSTD_QAT_CODEC +#if USE_QATLIB void registerCodecZSTDQAT(CompressionCodecFactory & factory); #endif void registerCodecMultiple(CompressionCodecFactory & factory); @@ -198,7 +198,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecNone(*this); registerCodecLZ4(*this); registerCodecZSTD(*this); -#ifdef ENABLE_ZSTD_QAT_CODEC +#if USE_QATLIB registerCodecZSTDQAT(*this); #endif registerCodecLZ4HC(*this); diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index f7edfb17542..9e5adbfe825 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -63,7 +63,7 @@ const char * auto_config_build[] "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", "USE_QPL", "@USE_QPL@", - "USE_QAT", "@ENABLE_QATLIB@", + "USE_QATLIB", "@USE_QATLIB@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 6782cc6d824..5b24f79ef6e 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -138,6 +138,9 @@ endif() if (TARGET ch_contrib::qpl) set(USE_QPL 1) endif() +if (TARGET ch_contrib::qatlib) + set(USE_QATLIB 1) +endif() if (TARGET ch_contrib::avrocpp) set(USE_AVRO 1) endif() From 759299910c2892b809735caaf663fe374c315c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 9 Aug 2024 09:14:00 +0000 Subject: [PATCH 079/132] Force new analyzer for test --- tests/queries/0_stateless/03217_filtering_in_storage_merge.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql index 5ecc1e7c672..42d31e95f9c 100644 --- a/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql @@ -13,4 +13,4 @@ INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10); SYSTEM SYNC REPLICA test_03217_merge_replica_2; -- If the filter on _table is not applied, then the plan will show both replicas -EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table; +EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table SETTINGS allow_experimental_analyzer=1; From dc64550536ff249b1c12070ed646bc4321bc68bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 9 Aug 2024 09:14:13 +0000 Subject: [PATCH 080/132] Remove wrong check from test --- .../0_stateless/03217_filtering_in_system_tables.reference | 4 ++-- .../0_stateless/03217_filtering_in_system_tables.sql | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference index 218fddf92e0..d7ccd989f53 100644 --- a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference @@ -1,4 +1,4 @@ information_schema tables default test_03217_system_tables_replica_1 r1 -1 1 -1 1 +1 +1 diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql index 72ca7c8684d..0db846bc500 100644 --- a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -10,18 +10,17 @@ CREATE TABLE test_03217_system_tables_replica_2(x UInt32) -- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name = 'r1'; - SYSTEM FLUSH LOGS; --- argMin-argMax is necessary to make the test repeatable +-- argMax is necessary to make the test repeatable -- StorageSystemTables -SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 +SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 AND current_database = currentDatabase() AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';' AND type = 'QueryFinish'; -- StorageSystemReplicas -SELECT argMin(read_rows, event_time_microseconds), argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 +SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 AND current_database = currentDatabase() AND query LIKE '%SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name = \'r1\';' AND type = 'QueryFinish'; From e4903858c8ae108b87b726a8056acab10dd6b851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 9 Aug 2024 09:31:33 +0000 Subject: [PATCH 081/132] Add extra check to make sure both replicas are present in system.replicas --- .../0_stateless/03217_filtering_in_system_tables.reference | 2 ++ tests/queries/0_stateless/03217_filtering_in_system_tables.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference index d7ccd989f53..c0761c3f689 100644 --- a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference @@ -1,4 +1,6 @@ information_schema tables +both default test_03217_system_tables_replica_1 r1 +both default test_03217_system_tables_replica_2 r2 default test_03217_system_tables_replica_1 r1 1 1 diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql index 0db846bc500..2ce63559b99 100644 --- a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -8,6 +8,8 @@ CREATE TABLE test_03217_system_tables_replica_2(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r2') ORDER BY x; +-- Make sure we can read both replicas +SELECT 'both', database, table, replica_name FROM system.replicas WHERE database = currentDatabase(); -- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name = 'r1'; SYSTEM FLUSH LOGS; From da907d535623422b7beb1d8cf3e7389698567e28 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Aug 2024 10:37:48 +0100 Subject: [PATCH 082/132] Better parsing --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index cd2a61cce4e..c59d36114ae 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -178,7 +178,7 @@ attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 202 clickhouse-client --query "CREATE TABLE minio_audit_logs ( log String, - event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(substring(JSONExtractRaw(log, 'time'), 2, 29), 9, 'UTC') + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') ) ENGINE = MergeTree ORDER BY tuple()" @@ -186,7 +186,7 @@ ORDER BY tuple()" clickhouse-client --query "CREATE TABLE minio_server_logs ( log String, - event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(substring(JSONExtractRaw(log, 'time'), 2, 29), 9, 'UTC') + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') ) ENGINE = MergeTree ORDER BY tuple()" From a5e06c7c311f142aa8d790cf398af19d86cbce4f Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Fri, 9 Aug 2024 11:06:11 +0200 Subject: [PATCH 083/132] Fix UB in hopEnd and hopStart This was causing segfaults because of a NULL pointer dereference --- src/Functions/FunctionsTimeWindow.cpp | 7 ++++++- .../01049_window_view_window_functions.reference | 2 ++ .../0_stateless/01049_window_view_window_functions.sql | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 77d740803be..faea9c6ba58 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -622,7 +622,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index 2d49664b280..47d1ccc57dd 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -67,3 +67,5 @@ SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 2020-01-10 00:00:00 SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2019-01-10 00:00:00 +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 617019bd2c6..3638dd1a3b2 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -36,3 +36,6 @@ SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, I SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); + +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } From 2657e2b3ef2f98802fd2b8ebcd359fe756b709c6 Mon Sep 17 00:00:00 2001 From: Graham Campbell Date: Fri, 9 Aug 2024 11:08:41 +0100 Subject: [PATCH 084/132] Do not apply redundant sorting removal when there's an offset --- .../Optimizations/removeRedundantSorting.cpp | 8 ++-- .../02496_remove_redundant_sorting.reference | 37 +++++++++++++++++ .../02496_remove_redundant_sorting.sh | 22 ++++++++++ ...emove_redundant_sorting_analyzer.reference | 41 +++++++++++++++++++ 4 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 7cac7bee6ec..f0094f0f8d2 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -59,9 +60,10 @@ public: if (typeid_cast(current_step) || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable - || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting - || typeid_cast(current_step)) /// (4) aggregation change order + || typeid_cast(current_step) /// (2) OFFSET on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (3) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (4) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (5) aggregation change order { logStep("nodes_affect_order/push", current_node); nodes_affect_order.push_back(current_node); diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..a0a1fd60812 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -527,3 +527,40 @@ Expression (Projection) 2 4 1 3 0 2 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT SUM(a) AS a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + GROUP BY 2 + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression (Projection) + Expression (Before ORDER BY) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Before ORDER BY) + Aggregating + Expression (Before GROUP BY) + ReadFromStorage (Values) +-- execute +0 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 646e2501a99..d59b4387101 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -329,3 +329,25 @@ FROM ORDER BY number DESC )" run_query "$query" + +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT SUM(a) AS a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + GROUP BY 2 + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index b6a2e3182df..58441de5f22 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -533,3 +533,44 @@ Expression (Project names) 2 4 1 3 0 2 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT SUM(a) AS a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + GROUP BY 2 + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression (Project names) + Expression (Projection) + Aggregating + Expression (Before GROUP BY) + Expression (Change column names to column identifiers) + Expression (Project names) + Offset + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Expression (Projection) + Expression (Change column names to column identifiers) + Expression (Project names) + Expression (Projection) + Aggregating + Expression (Before GROUP BY) + Expression (Change column names to column identifiers) + ReadFromStorage (Values) +-- execute +0 From 4ced1f37e8e478ef806bd623a411916c18169dfe Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Aug 2024 11:56:53 +0100 Subject: [PATCH 085/132] Escape quote --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c59d36114ae..acdcda753d7 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -178,7 +178,7 @@ attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 202 clickhouse-client --query "CREATE TABLE minio_audit_logs ( log String, - event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') ) ENGINE = MergeTree ORDER BY tuple()" @@ -186,7 +186,7 @@ ORDER BY tuple()" clickhouse-client --query "CREATE TABLE minio_server_logs ( log String, - event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') + event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC') ) ENGINE = MergeTree ORDER BY tuple()" From 36c0c4562b8622b84012a12e29175f272bda2b0b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Aug 2024 13:14:30 +0200 Subject: [PATCH 086/132] Fix race in WithRetries --- src/Backups/WithRetries.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp index 181e6331ac9..9f22085f5a9 100644 --- a/src/Backups/WithRetries.cpp +++ b/src/Backups/WithRetries.cpp @@ -68,13 +68,19 @@ const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const { - /// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there + zkutil::ZooKeeperPtr current_zookeeper; + { + std::lock_guard lock(zookeeper_mutex); + current_zookeeper = zookeeper; + } + + /// We need to create new instance of ZooKeeperWithFaultInjection each time and copy a pointer to ZooKeeper client there /// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition /// when the same object is used from multiple threads. auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance( settings.keeper_fault_injection_probability, settings.keeper_fault_injection_seed, - zookeeper, + current_zookeeper, log->name(), log); From 1e3ccbc3ec81c5b9d79a034159181f1f6bdb195c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 9 Aug 2024 11:22:44 +0000 Subject: [PATCH 087/132] add perf test for subcolumns --- .../optimize_functions_to_subcolumns.xml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/performance/optimize_functions_to_subcolumns.xml diff --git a/tests/performance/optimize_functions_to_subcolumns.xml b/tests/performance/optimize_functions_to_subcolumns.xml new file mode 100644 index 00000000000..a246aae7950 --- /dev/null +++ b/tests/performance/optimize_functions_to_subcolumns.xml @@ -0,0 +1,27 @@ + + + 1 + 4 + + + + CREATE TABLE t_subcolumns (a Array(UInt64), s Nullable(String), m Map(String, UInt64)) ENGINE = MergeTree ORDER BY tuple() + + + + INSERT INTO t_subcolumns SELECT range(number % 20), toString(number), mapFromArrays(range(number % 20), range(number % 20)) FROM numbers_mt(50000000) + + + + OPTIMIZE TABLE t_subcolumns FINAL + + + SELECT count() FROM t_subcolumns WHERE NOT ignore(length(a)) + SELECT count() FROM t_subcolumns WHERE notEmpty(a) + SELECT count() FROM t_subcolumns WHERE NOT ignore(length(m)) + SELECT count() FROM t_subcolumns WHERE notEmpty(m) + SELECT count() FROM t_subcolumns WHERE isNotNull(s) + SELECT count(s) FROM t_subcolumns + + DROP TABLE t_subcolumns + From e8f2f65e62c65878463879396b7ebdceed48c5e3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Aug 2024 11:51:23 +0000 Subject: [PATCH 088/132] Avoid converting type to string and back in _CAST --- src/Functions/CastOverloadResolver.cpp | 23 +++++++++++++++---- src/Functions/CastOverloadResolver.h | 6 ++++- src/Functions/toBool.cpp | 3 +-- src/Interpreters/ActionsDAG.cpp | 12 +++++----- src/Interpreters/castColumn.cpp | 6 ++--- .../optimizeUseAggregateProjection.cpp | 21 ++++++----------- src/Processors/Transforms/WindowTransform.cpp | 17 ++------------ src/Storages/MergeTree/KeyCondition.cpp | 7 ++---- .../MergeTreeSplitPrewhereIntoReadSteps.cpp | 20 +++++----------- ...rojection_with_normalized_states.reference | 1 + ...gate_projection_with_normalized_states.sql | 2 ++ 11 files changed, 52 insertions(+), 66 deletions(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 49f63073aaf..6cb4d492fd8 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,7 @@ FunctionBasePtr createFunctionBaseCast( class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: - const char * getNameImpl() const + static const char * getNameImpl(CastType cast_type, bool internal) { if (cast_type == CastType::accurate) return "accurateCast"; @@ -49,7 +50,7 @@ public: String getName() const override { - return getNameImpl(); + return getNameImpl(cast_type, internal); } size_t getNumberOfArguments() const override { return 2; } @@ -79,10 +80,22 @@ public: } } + static FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional diagnostic) + { + if (cast_type == CastType::accurateOrNull && !isVariant(to)) + to = makeNullable(to); + + ColumnsWithTypeAndName arguments; + arguments.emplace_back(std::move(from)); + arguments.emplace_back().type = std::make_unique(); + + return createFunctionBaseCast(nullptr, getNameImpl(cast_type, true), arguments, to, diagnostic, cast_type); + } + protected: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { - return createFunctionBaseCast(context, getNameImpl(), arguments, return_type, diagnostic, cast_type); + return createFunctionBaseCast(context, getNameImpl(cast_type, internal), arguments, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -130,9 +143,9 @@ private: }; -FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) +FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional diagnostic) { - return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic); + return CastOverloadResolverImpl::createInternalCast(std::move(from), std::move(to), cast_type, std::move(diagnostic)); } REGISTER_FUNCTION(CastOverloadResolvers) diff --git a/src/Functions/CastOverloadResolver.h b/src/Functions/CastOverloadResolver.h index 7d98f774812..66f9d6cfcaf 100644 --- a/src/Functions/CastOverloadResolver.h +++ b/src/Functions/CastOverloadResolver.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,6 +12,9 @@ namespace DB class IFunctionOverloadResolver; using FunctionOverloadResolverPtr = std::shared_ptr; +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + enum class CastType : uint8_t { nonAccurate, @@ -24,6 +28,6 @@ struct CastDiagnostic std::string column_to; }; -FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic); +FunctionBasePtr createInternalCast(ColumnWithTypeAndName from, DataTypePtr to, CastType cast_type, std::optional diagnostic); } diff --git a/src/Functions/toBool.cpp b/src/Functions/toBool.cpp index 6f2c436c1ea..ac595d313e3 100644 --- a/src/Functions/toBool.cpp +++ b/src/Functions/toBool.cpp @@ -54,8 +54,7 @@ namespace } }; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); - auto func_cast = func_builder_cast->build(cast_args); + auto func_cast = createInternalCast(arguments[0], result_type, CastType::nonAccurate, {}); return func_cast->execute(cast_args, result_type, arguments[0].column->size()); } }; diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index df1c0aa1f2a..2a594839c6a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -301,11 +301,11 @@ const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const Da column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); column.type = std::make_shared(); - const auto * cast_type_constant_node = &addColumn(std::move(column)); + const auto * cast_type_constant_node = &addColumn(column); ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node}; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); + auto func_base_cast = createInternalCast(ColumnWithTypeAndName{node_to_cast.result_type, node_to_cast.result_name}, cast_type, CastType::nonAccurate, {}); - return addFunction(func_builder_cast, std::move(children), result_name); + return addFunction(func_base_cast, std::move(children), result_name); } const ActionsDAG::Node & ActionsDAG::addFunctionImpl( @@ -1547,11 +1547,11 @@ ActionsDAG ActionsDAG::makeConvertingActions( const auto * left_arg = dst_node; CastDiagnostic diagnostic = {dst_node->result_name, res_elem.name}; - FunctionOverloadResolverPtr func_builder_cast - = createInternalCastOverloadResolver(CastType::nonAccurate, std::move(diagnostic)); + ColumnWithTypeAndName left_column{nullptr, dst_node->result_type, {}}; + auto func_base_cast = createInternalCast(std::move(left_column), res_elem.type, CastType::nonAccurate, std::move(diagnostic)); NodeRawConstPtrs children = { left_arg, right_arg }; - dst_node = &actions_dag.addFunction(func_builder_cast, std::move(children), {}); + dst_node = &actions_dag.addFunction(func_base_cast, std::move(children), {}); } if (dst_node->column && isColumnConst(*dst_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) diff --git a/src/Interpreters/castColumn.cpp b/src/Interpreters/castColumn.cpp index 906dfb84b14..a779c9bc34d 100644 --- a/src/Interpreters/castColumn.cpp +++ b/src/Interpreters/castColumn.cpp @@ -26,11 +26,9 @@ static ColumnPtr castColumn(CastType cast_type, const ColumnWithTypeAndName & ar "" } }; - auto get_cast_func = [cast_type, &arguments] + auto get_cast_func = [from = arg, to = type, cast_type] { - - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(cast_type, {}); - return func_builder_cast->build(arguments); + return createInternalCast(from, to, cast_type, {}); }; FunctionBasePtr func_cast = cache ? cache->getOrSet(cast_type, from_name, to_name, std::move(get_cast_func)) : get_cast_func(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 52d1931c51e..b31ee7ea53c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -255,20 +255,13 @@ static void appendAggregateFunctions( const auto * node = input; - if (node->result_name != aggregate.column_name) - { - if (DataTypeAggregateFunction::strictEquals(type, node->result_type)) - { - node = &proj_dag.addAlias(*node, aggregate.column_name); - } - else - { - /// Cast to aggregate types specified in query if it's not - /// strictly the same as the one specified in projection. This - /// is required to generate correct results during finalization. - node = &proj_dag.addCast(*node, type, aggregate.column_name); - } - } + if (!DataTypeAggregateFunction::strictEquals(type, node->result_type)) + /// Cast to aggregate types specified in query if it's not + /// strictly the same as the one specified in projection. This + /// is required to generate correct results during finalization. + node = &proj_dag.addCast(*node, type, aggregate.column_name); + else if (node->result_name != aggregate.column_name) + node = &proj_dag.addAlias(*node, aggregate.column_name); proj_dag_outputs.push_back(node); } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index c26cd7cc8c3..c27c230c741 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -2337,22 +2337,9 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction argument_types[2]->getName()); } - const auto from_name = argument_types[2]->getName(); - const auto to_name = argument_types[0]->getName(); - ColumnsWithTypeAndName arguments + auto get_cast_func = [from = argument_types[2], to = argument_types[0]] { - { argument_types[2], "" }, - { - DataTypeString().createColumnConst(0, to_name), - std::make_shared(), - "" - } - }; - - auto get_cast_func = [&arguments] - { - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::accurate, {}); - return func_builder_cast->build(arguments); + return createInternalCast({from, {}}, to, CastType::accurate, {}); }; func_cast = get_cast_func(); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index dfb43c4e75d..aa7a498d5a3 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1956,11 +1956,8 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme auto common_type_maybe_nullable = (key_expr_type_is_nullable && !common_type->isNullable()) ? DataTypePtr(std::make_shared(common_type)) : common_type; - ColumnsWithTypeAndName arguments{ - {nullptr, key_expr_type, ""}, - {DataTypeString().createColumnConst(1, common_type_maybe_nullable->getName()), common_type_maybe_nullable, ""}}; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); - auto func_cast = func_builder_cast->build(arguments); + + auto func_cast = createInternalCast({key_expr_type, {}}, common_type_maybe_nullable, CastType::nonAccurate, {}); /// If we know the given range only contains one value, then we treat all functions as positive monotonic. if (!single_point && !func_cast->hasInformationAboutMonotonicity()) diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 36ff6c0a4bd..9c82817e8cb 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -152,23 +152,15 @@ const ActionsDAG::Node & addFunction( const ActionsDAG::Node & addCast( const ActionsDAGPtr & dag, const ActionsDAG::Node & node_to_cast, - const String & type_name, + const DataTypePtr & to_type, OriginalToNewNodeMap & node_remap) { - if (node_to_cast.result_type->getName() == type_name) + if (!node_to_cast.result_type->equals(*to_type)) return node_to_cast; - Field cast_type_constant_value(type_name); - - ColumnWithTypeAndName column; - column.column = DataTypeString().createColumnConst(0, cast_type_constant_value); - column.type = std::make_shared(); - - const auto * cast_type_constant_node = &dag->addColumn(std::move(column)); - ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node}; - FunctionOverloadResolverPtr func_builder_cast = createInternalCastOverloadResolver(CastType::nonAccurate, {}); - - return addFunction(dag, func_builder_cast, std::move(children), node_remap); + const auto & new_node = dag->addCast(node_to_cast, to_type, {}); + node_remap[new_node.result_name] = {dag.get(), &new_node}; + return new_node; } /// Normalizes the filter node by adding AND with a constant true. @@ -332,7 +324,7 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction /// Build AND(last_step_result_node, true) const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap); /// Build CAST(and_node, type of PREWHERE column) - const auto & cast_node = addCast(last_step_dag, and_node, output->result_type->getName(), node_remap); + const auto & cast_node = addCast(last_step_dag, and_node, output->result_type, node_remap); /// Add alias for the result with the name of the PREWHERE column const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name); last_step_dag->addOrReplaceInOutputs(prewhere_result_node); diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference index 25aa9dc5dec..37993873983 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.reference @@ -1,2 +1,3 @@ 3 950 990 500 2000 +[950] [999] diff --git a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql index 5375823aa8e..956bf3711a2 100644 --- a/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql +++ b/tests/queries/0_stateless/01710_aggregate_projection_with_normalized_states.sql @@ -29,4 +29,6 @@ FROM cluster('test_cluster_two_shards', currentDatabase(), r) WHERE a = 'x' settings prefer_localhost_replica=0; +SELECT quantilesTimingMerge(0.95)(q), quantilesTimingMerge(toInt64(1))(q) FROM remote('127.0.0.{1,2}', currentDatabase(), r); + DROP TABLE r; From 3d850f8ceb0ca5cfae26e8faa7c4d900cc4e8fda Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 9 Aug 2024 13:58:02 +0200 Subject: [PATCH 089/132] fix --- src/Processors/Sources/ShellCommandSource.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 23359367a9b..f55a3713215 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -70,17 +70,16 @@ static void makeFdBlocking(int fd) static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds) { + auto logger = getLogger("TimeoutReadBufferFromFileDescriptor"); + auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; + int res; while (true) { Stopwatch watch; - auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; - LOG_TEST( - getLogger("TimeoutReadBufferFromFileDescriptor"), - "Polling descriptors: {}", - fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", ")); + LOG_TEST(logger, "Polling descriptors: {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", ")); res = poll(pfds, static_cast(num), static_cast(timeout_milliseconds)); @@ -92,11 +91,7 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond const auto elapsed = watch.elapsedMilliseconds(); if (timeout_milliseconds <= elapsed) { - LOG_TEST( - getLogger("TimeoutReadBufferFromFileDescriptor"), - "Timeout exceeded: elapsed={}, timeout={}", - elapsed, - timeout_milliseconds); + LOG_TEST(logger, "Timeout exceeded: elapsed={}, timeout={}", elapsed, timeout_milliseconds); break; } timeout_milliseconds -= elapsed; @@ -107,9 +102,8 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond } } - auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); }; LOG_TEST( - getLogger("TimeoutReadBufferFromFileDescriptor"), + logger, "Poll for descriptors: {} returned {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "), res); From 1875e8d9cd581871c247e3a4cc58f1f57ffa0659 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Fri, 9 Aug 2024 14:06:52 +0200 Subject: [PATCH 090/132] Fix UB in tumbleEnd and tumbleStart This was causing segfaults because of a NULL pointer dereference --- src/Functions/FunctionsTimeWindow.cpp | 7 ++++++- .../01049_window_view_window_functions.reference | 2 ++ .../0_stateless/01049_window_view_window_functions.sql | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index faea9c6ba58..88b85c48326 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -267,7 +267,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index 47d1ccc57dd..073301104d2 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -69,3 +69,5 @@ SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DA 2019-01-10 00:00:00 SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 3638dd1a3b2..fb2b4b4949a 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -39,3 +39,5 @@ SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DA SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } From 20563bc6cbc6e70eb6926c5f21fded356270f40f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 9 Aug 2024 12:19:44 +0000 Subject: [PATCH 091/132] Make test work with ReplicatedDatabase in test --- .../0_stateless/03217_filtering_in_system_tables.sql | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql index 2ce63559b99..eb506dfe39a 100644 --- a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -9,9 +9,11 @@ CREATE TABLE test_03217_system_tables_replica_2(x UInt32) ORDER BY x; -- Make sure we can read both replicas -SELECT 'both', database, table, replica_name FROM system.replicas WHERE database = currentDatabase(); +-- The replica name might be altered because of `_functional_tests_helper_database_replicated_replace_args_macros`, +-- thus we need to use `left` +SELECT 'both', database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase(); -- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables -SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name = 'r1'; +SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name LIKE 'r1%'; SYSTEM FLUSH LOGS; -- argMax is necessary to make the test repeatable @@ -24,5 +26,5 @@ SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 -- StorageSystemReplicas SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 AND current_database = currentDatabase() - AND query LIKE '%SELECT database, table, replica_name FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name = \'r1\';' + AND query LIKE '%SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name LIKE \'r1\%\';' AND type = 'QueryFinish'; From 96b54df1638043e27887099df5f7cf310d1e7fa4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 9 Aug 2024 15:04:03 +0200 Subject: [PATCH 092/132] fix bugprone-macro-parentheses --- src/Interpreters/SystemLog.cpp | 4 +++- src/Interpreters/SystemLog.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index d4403b72583..832c39bfaf8 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -284,11 +284,13 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConfiguration & config) { +/// NOLINTBEGIN(bugprone-macro-parentheses) #define CREATE_PUBLIC_MEMBERS(log_type, member, descr) \ member = createSystemLog(global_context, "system", #member, config, #member, descr); \ LIST_OF_ALL_SYSTEM_LOGS(CREATE_PUBLIC_MEMBERS) #undef CREATE_PUBLIC_MEMBERS +/// NOLINTEND(bugprone-macro-parentheses) if (session_log) global_context->addWarningMessage("Table system.session_log is enabled. It's unreliable and may contain garbage. Do not use it for any kind of security monitoring."); @@ -333,7 +335,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf std::vector SystemLogs::getAllLogs() const { #define GET_RAW_POINTERS(log_type, member, descr) \ - member.get(), \ + (member).get(), \ std::vector result = { LIST_OF_ALL_SYSTEM_LOGS(GET_RAW_POINTERS) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 24ef6a18eb8..9e1af3578bd 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -60,11 +60,13 @@ namespace DB }; */ +/// NOLINTBEGIN(bugprone-macro-parentheses) #define FORWARD_DECLARATION(log_type, member, descr) \ class log_type; \ LIST_OF_ALL_SYSTEM_LOGS(FORWARD_DECLARATION) #undef FORWARD_DECLARATION +/// NOLINTEND(bugprone-macro-parentheses) /// System logs should be destroyed in destructor of the last Context and before tables, From 21be195dd8f841c3cfaf453bb7faba02627b9c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 9 Aug 2024 13:14:09 +0000 Subject: [PATCH 093/132] Revert unnecessary change --- .../clickhouse_path/format_schemas/test.capnp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp index 247e7b9ceca..44f1961205b 100644 --- a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/test.capnp @@ -7,4 +7,4 @@ struct TestRecordStruct val1 @2 : Text; val2 @3 : Float32; val3 @4 : UInt8; -} +} \ No newline at end of file From 65ebcd6f21b26144cb47e6b71c939517b1fb38a2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 9 Aug 2024 13:55:47 +0000 Subject: [PATCH 094/132] Fixing test. --- .../0_stateless/01656_test_query_log_factories_info.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference index 47b3133ceca..44531c19ab7 100644 --- a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference +++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference @@ -17,7 +17,7 @@ used_functions ['repeat'] arraySort(used_data_type_families) -['Array','Int32','Nullable','String'] +['Int32','Nullable','String'] used_database_engines ['Atomic'] From 79cfffbaf88c3c34213d5144fd20f7888b6cabe0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 9 Aug 2024 15:57:16 +0100 Subject: [PATCH 095/132] Even bigger queue --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index acdcda753d7..a030be92506 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -193,8 +193,8 @@ ORDER BY tuple()" # create minio log webhooks for both audit and server logs # use async inserts to avoid creating too many parts -./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=300000 -./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=300000 +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 max_retries=100 retry=1 From c13d348d1e8a467b9d16fc83214ef574752092e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 19:56:50 +0200 Subject: [PATCH 096/132] Fix test `00900_long_parquet_load` --- tests/queries/0_stateless/00900_long_parquet_load.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00900_long_parquet_load.sh b/tests/queries/0_stateless/00900_long_parquet_load.sh index 1bafb033f56..3a7022ac0cf 100755 --- a/tests/queries/0_stateless/00900_long_parquet_load.sh +++ b/tests/queries/0_stateless/00900_long_parquet_load.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-debug +# Tags: long, no-fasttest, no-debug, no-asan, no-msan, no-tsan # # Load all possible .parquet files found in submodules. From b5afddb1af0a9aeb4738cf3fb7b7242361469028 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 9 Aug 2024 22:56:25 +0200 Subject: [PATCH 097/132] Update optimize_functions_to_subcolumns.xml --- tests/performance/optimize_functions_to_subcolumns.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/optimize_functions_to_subcolumns.xml b/tests/performance/optimize_functions_to_subcolumns.xml index a246aae7950..146af1605c4 100644 --- a/tests/performance/optimize_functions_to_subcolumns.xml +++ b/tests/performance/optimize_functions_to_subcolumns.xml @@ -1,6 +1,5 @@ - 1 4 From 9b9fff4232d80e579b1d23ced8bfbb1b2c5e2147 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Sat, 10 Aug 2024 08:48:08 +0200 Subject: [PATCH 098/132] Push CI From e582118544f3c49c3c6600ac8fa252151714d25f Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 10 Aug 2024 13:09:40 +0200 Subject: [PATCH 099/132] review changes --- .../functions/type-conversion-functions.md | 916 +++++++++++++++--- 1 file changed, 801 insertions(+), 115 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 8e72fea7fdb..5c06e72f977 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -64,9 +64,8 @@ toInt8(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -251,9 +250,8 @@ toInt8OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int8` is unsuccessful. [Int8](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -314,9 +312,8 @@ toInt16(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -501,9 +498,8 @@ toInt16OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int16` is unsuccessful. [Int16](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -564,9 +560,8 @@ toInt32(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -750,9 +745,8 @@ toInt32OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int32` is unsuccessful. [Int32](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -813,9 +807,8 @@ toInt64(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported types: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1000,9 +993,8 @@ toInt64OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int64` is unsuccessful. [Int64](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1063,9 +1055,8 @@ toInt128(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1312,9 +1303,8 @@ toInt256(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1498,9 +1488,8 @@ toInt256OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `Int256` is unsuccessful. [Int256](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf` @@ -1561,9 +1550,8 @@ toUInt8(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1748,9 +1736,8 @@ toUInt8OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt8` is unsuccessful. [UInt8](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1811,9 +1798,8 @@ toUInt16(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -1998,9 +1984,8 @@ toUInt16OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt16` is unsuccessful. [UInt16](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2061,9 +2046,8 @@ toUInt32(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2249,9 +2233,8 @@ toUInt32OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt32` is unsuccessful. [UInt32](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2312,9 +2295,8 @@ toUInt64(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported types: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2499,9 +2481,8 @@ toUInt64OrDefault(expr[, default]) - `defauult` (optional) — The default value to return if parsing to type `UInt64` is unsuccessful. [UInt64](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2562,9 +2543,8 @@ toUInt128(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2811,9 +2791,8 @@ toUInt256(expr) - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Unsupported arguments: - String representations of Float32/64 values, including `NaN` and `Inf`. @@ -2997,9 +2976,8 @@ toUInt256OrDefault(expr[, default]) - `default` (optional) — The default value to return if parsing to type `UInt256` is unsuccessful. [UInt256](../data-types/int-uint.md). Supported arguments: -- Values of type (U)Int8/16/32/64/128/256. +- Values or string representations of type (U)Int8/16/32/64/128/256. - Values of type Float32/64. -- String representations of (U)Int8/16/32/128/256. Arguments for which the default value is returned: - String representations of Float32/64 values, including `NaN` and `Inf` @@ -3555,7 +3533,7 @@ toDecimal32(expr, S) **Arguments** - `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). -- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). Supported arguments: - Values or string representations of type (U)Int8/16/32/64/128/256. @@ -3566,7 +3544,7 @@ Unsupported arguments: - String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32('0xc0fe', 1);`. :::note -Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. Excessive digits in a fraction are discarded (not rounded). Excessive digits in the integer part will lead to an exception. ::: @@ -3619,20 +3597,20 @@ toDecimal32OrZero(expr, S) **Arguments** - `expr` — A String representation of a number. [String](../data-types/string.md). -- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). Supported arguments: - String representations of type (U)Int8/16/32/64/128/256. - String representations of type Float32/64. Unsupported arguments: -- String representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of Float32/64 values `NaN` and `Inf`. - String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrZero('0xc0fe', 1);`. :::note -Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. Excessive digits in a fraction are discarded (not rounded). -Excessive digits in the integer part will lead to an exception. +Excessive digits in the integer part will lead to an error. ::: **Returned value** @@ -3645,8 +3623,10 @@ Query: ``` sql SELECT - toDecimal32OrZero(toString(-1.111), 5) AS val, - toTypeName(val) + toDecimal32OrZero(toString(-1.111), 5) AS a, + toTypeName(a), + toDecimal32OrZero(toString('Inf'), 5) as b, + toTypeName(b) FORMAT Vertical; ``` @@ -3655,26 +3635,10 @@ Result: ```response Row 1: ────── -val: -1.111 -toTypeName(val): Decimal(9, 5) -``` - -Query: - -``` sql -SELECT - toDecimal32OrZero(toString(-1.111), 2) AS val, - toTypeName(val) -FORMAT Vertical; -``` - -Result: - -```response -Row 1: -────── -val: -1.11 -toTypeName(val): Decimal(9, 2) +a: -1.111 +toTypeName(a): Decimal(9, 5) +b: 0 +toTypeName(b): Decimal(9, 5) ``` **See also** @@ -3696,20 +3660,20 @@ toDecimal32OrNull(expr, S) **Arguments** - `expr` — A String representation of a number. [String](../data-types/string.md). -- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). Supported arguments: - String representations of type (U)Int8/16/32/64/128/256. - String representations of type Float32/64. Unsupported arguments: -- String representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of Float32/64 values `NaN` and `Inf`. - String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrNull('0xc0fe', 1);`. :::note -Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. Excessive digits in a fraction are discarded (not rounded). -Excessive digits in the integer part will lead to an exception. +Excessive digits in the integer part will lead to an error. ::: **Returned value** @@ -3722,8 +3686,10 @@ Query: ``` sql SELECT - toDecimal32OrNull(toString(-1.111), 5) AS val, - toTypeName(val) + toDecimal32OrNull(toString(-1.111), 5) AS a, + toTypeName(a), + toDecimal32OrNull(toString('Inf'), 5) as b, + toTypeName(b) FORMAT Vertical; ``` @@ -3732,26 +3698,10 @@ Result: ```response Row 1: ────── -val: -1.111 -toTypeName(val): Nullable(Decimal(9, 5)) -``` - -Query: - -``` sql -SELECT - toDecimal32OrNull(toString(-1.111), 2) AS val, - toTypeName(val) -FORMAT Vertical; -``` - -Result: - -```response -Row 1: -────── -val: -1.11 -toTypeName(val): Nullable(Decimal(9, 2)) +a: -1.111 +toTypeName(a): Nullable(Decimal(9, 5)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(9, 5)) ``` **See also** @@ -3773,21 +3723,21 @@ toDecimal32OrDefault(expr, S[, default]) **Arguments** - `expr` — A String representation of a number. [String](../data-types/string.md). -- `S` — Scale parameter from [ 1 : 9 ] specifying how many decimal digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). -- `default` (optional) — The default value to return if parsing to type `Decimal32(S)` is unsuccessful. [Decimal32(S](../data-types/decimal.md). +- `S` — Scale parameter between 0 and 9, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal32(S)` is unsuccessful. [Decimal32(S)](../data-types/decimal.md). Supported arguments: - String representations of type (U)Int8/16/32/64/128/256. - String representations of type Float32/64. Unsupported arguments: -- String representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of Float32/64 values `NaN` and `Inf`. - String representations of binary and hexadecimal values, e.g. `SELECT toDecimal32OrDefault('0xc0fe', 1);`. :::note -Integer overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal32`: `( -1 * 10^(9 - S), 1 * 10^(9 - S) )`. Excessive digits in a fraction are discarded (not rounded). -Excessive digits in the integer part will lead to an exception. +Excessive digits in the integer part will lead to an error. ::: **Returned value** @@ -3800,8 +3750,10 @@ Query: ``` sql SELECT - toDecimal32OrDefault(toString(-1.111), 5) AS val, - toTypeName(val) + toDecimal32OrDefault(toString(0.0001), 5) AS a, + toTypeName(a), + toDecimal32OrDefault('Inf', 0, CAST('-1', 'Decimal32(0)')) AS b, + toTypeName(b) FORMAT Vertical; ``` @@ -3810,16 +3762,125 @@ Result: ```response Row 1: ────── -val: -1.111 -toTypeName(val): Decimal(9, 5) +a: 0.0001 +toTypeName(a): Decimal(9, 5) +b: -1 +toTypeName(b): Decimal(9, 0) ``` +**See also** + +- [`toDecimal32`](#todecimal32). +- [`toDecimal32OrZero`](#todecimal32orzero). +- [`toDecimal32OrNull`](#todecimal32ornull). + +## toDecimal64 + +Converts an input value to a value of type [`Decimal(18, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. + +**Syntax** + +```sql +toDecimal64(expr, S) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(18, S)`. [Decimal64(S)](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toDecimal64(2, 1) AS a, toTypeName(a) AS type_a, + toDecimal64(4.2, 2) AS b, toTypeName(b) AS type_b, + toDecimal64('4.2', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 2 +type_a: Decimal(18, 1) +b: 4.2 +type_b: Decimal(18, 2) +c: 4.2 +type_c: Decimal(18, 3) +``` + +**See also** + +- [`toDecimal64OrZero`](#todecimal64orzero). +- [`toDecimal64OrNull`](#todecimal64ornull). +- [`toDecimal64OrDefault`](#todecimal64ordefault). + +## toDecimal64OrZero + +Like [`toDecimal64`](#todecimal64), this function converts an input value to a value of type [Decimal(18, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal64OrZero(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(18, S)` if successful, otherwise `0` with `S` decimal places. [Decimal64(S)](../data-types/decimal.md). + +**Example** + Query: ``` sql SELECT - toDecimal32OrDefault(toString(-1.111), 2) AS val, - toTypeName(val) + toDecimal64OrZero(toString(0.0001), 18) AS a, + toTypeName(a), + toDecimal64OrZero(toString('Inf'), 18) as b, + toTypeName(b) FORMAT Vertical; ``` @@ -3828,16 +3889,61 @@ Result: ```response Row 1: ────── -val: -1.11 -toTypeName(val): Decimal(9, 2) +a: 0.0001 +toTypeName(a): Decimal(18, 18) +b: 0 +toTypeName(b): Decimal(18, 18) ``` +**See also** + +- [`toDecimal64`](#todecimal64). +- [`toDecimal64OrNull`](#todecimal64ornull). +- [`toDecimal64OrDefault`](#todecimal64ordefault). + +## toDecimal64OrNull + +Like [`toDecimal64`](#todecimal64), this function converts an input value to a value of type [Nullable(Decimal(18, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal64OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(18, S))` if successful, otherwise value `NULL` of the same type. [Decimal64(S)](../data-types/decimal.md). + +**Examples** + Query: ``` sql SELECT - toDecimal32OrDefault('Inf', 2, CAST('0', 'Decimal32(2)')) AS val, - toTypeName(val) + toDecimal64OrNull(toString(0.0001), 18) AS a, + toTypeName(a), + toDecimal64OrNull(toString('Inf'), 18) as b, + toTypeName(b) FORMAT Vertical; ``` @@ -3846,10 +3952,590 @@ Result: ```response Row 1: ────── -val: 0 -toTypeName(val): Decimal(9, 2) +a: 0.0001 +toTypeName(a): Nullable(Decimal(18, 18)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(18, 18)) ``` +**See also** + +- [`toDecimal64`](#todecimal64). +- [`toDecimal64OrZero`](#todecimal64orzero). +- [`toDecimal64OrDefault`](#todecimal64ordefault). + +## toDecimal64OrDefault + +Like [`toDecimal64`](#todecimal64), this function converts an input value to a value of type [Decimal(18, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal64OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 18, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal64(S)` is unsuccessful. [Decimal64(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal64OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal64`: `( -1 * 10^(18 - S), 1 * 10^(18 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(18, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal64(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal64OrDefault(toString(0.0001), 18) AS a, + toTypeName(a), + toDecimal64OrDefault('Inf', 0, CAST('-1', 'Decimal64(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(18, 18) +b: -1 +toTypeName(b): Decimal(18, 0) +``` + +**See also** + +- [`toDecimal64`](#todecimal64). +- [`toDecimal64OrZero`](#todecimal64orzero). +- [`toDecimal64OrNull`](#todecimal64ornull). + +## toDecimal128 + +Converts an input value to a value of type [`Decimal(38, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. + +**Syntax** + +```sql +toDecimal128(expr, S) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(38, S)`. [Decimal128(S)](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toDecimal128(99, 1) AS a, toTypeName(a) AS type_a, + toDecimal128(99.67, 2) AS b, toTypeName(b) AS type_b, + toDecimal128('99.67', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 99 +type_a: Decimal(38, 1) +b: 99.67 +type_b: Decimal(38, 2) +c: 99.67 +type_c: Decimal(38, 3) +``` + +**See also** + +- [`toDecimal128OrZero`](#todecimal128orzero). +- [`toDecimal128OrNull`](#todecimal128ornull). +- [`toDecimal128OrDefault`](#todecimal128ordefault). + +## toDecimal128OrZero + +Like [`toDecimal128`](#todecimal128), this function converts an input value to a value of type [Decimal(38, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal128OrZero(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(38, S)` if successful, otherwise `0` with `S` decimal places. [Decimal128(S)](../data-types/decimal.md). + +**Example** + +Query: + +``` sql +SELECT + toDecimal128OrZero(toString(0.0001), 38) AS a, + toTypeName(a), + toDecimal128OrZero(toString('Inf'), 38) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(38, 38) +b: 0 +toTypeName(b): Decimal(38, 38) +``` + +**See also** + +- [`toDecimal128`](#todecimal128). +- [`toDecimal128OrNull`](#todecimal128ornull). +- [`toDecimal128OrDefault`](#todecimal128ordefault). + +## toDecimal128OrNull + +Like [`toDecimal128`](#todecimal128), this function converts an input value to a value of type [Nullable(Decimal(38, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal128OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(38, S))` if successful, otherwise value `NULL` of the same type. [Decimal128(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal128OrNull(toString(1/42), 38) AS a, + toTypeName(a), + toDecimal128OrNull(toString('Inf'), 38) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Nullable(Decimal(38, 38)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(38, 38)) +``` + +**See also** + +- [`toDecimal128`](#todecimal128). +- [`toDecimal128OrZero`](#todecimal128orzero). +- [`toDecimal128OrDefault`](#todecimal128ordefault). + +## toDecimal128OrDefault + +Like [`toDecimal128`](#todecimal128), this function converts an input value to a value of type [Decimal(38, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal128OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 38, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal128(S)` is unsuccessful. [Decimal128(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal128OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal128`: `( -1 * 10^(38 - S), 1 * 10^(38 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(38, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal128(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal128OrDefault(toString(1/42), 18) AS a, + toTypeName(a), + toDecimal128OrDefault('Inf', 0, CAST('-1', 'Decimal128(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Decimal(38, 18) +b: -1 +toTypeName(b): Decimal(38, 0) +``` + +**See also** + +- [`toDecimal128`](#todecimal128). +- [`toDecimal128OrZero`](#todecimal128orzero). +- [`toDecimal128OrNull`](#todecimal128ornull). + +## toDecimal256 + +Converts an input value to a value of type [`Decimal(76, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. + +**Syntax** + +```sql +toDecimal256(expr, S) +``` + +**Arguments** + +- `expr` — Expression returning a number or a string representation of a number. [Expression](../syntax.md/#syntax-expressions). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- Values or string representations of type (U)Int8/16/32/64/128/256. +- Values or string representations of type Float32/64. + +Unsupported arguments: +- Values or string representations of Float32/64 values `NaN` and `Inf` (case-insensitive). +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an exception. +::: + +**Returned value** + +- Value of type `Decimal(76, S)`. [Decimal256(S)](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toDecimal256(99, 1) AS a, toTypeName(a) AS type_a, + toDecimal256(99.67, 2) AS b, toTypeName(b) AS type_b, + toDecimal256('99.67', 3) AS c, toTypeName(c) AS type_c +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 99 +type_a: Decimal(76, 1) +b: 99.67 +type_b: Decimal(76, 2) +c: 99.67 +type_c: Decimal(76, 3) +``` + +**See also** + +- [`toDecimal256OrZero`](#todecimal256orzero). +- [`toDecimal256OrNull`](#todecimal256ornull). +- [`toDecimal256OrDefault`](#todecimal256ordefault). + +## toDecimal256OrZero + +Like [`toDecimal256`](#todecimal256), this function converts an input value to a value of type [Decimal(76, S)](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal256OrZero(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256OrZero('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(76, S)` if successful, otherwise `0` with `S` decimal places. [Decimal256(S)](../data-types/decimal.md). + +**Example** + +Query: + +``` sql +SELECT + toDecimal256OrZero(toString(0.0001), 76) AS a, + toTypeName(a), + toDecimal256OrZero(toString('Inf'), 76) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.0001 +toTypeName(a): Decimal(76, 76) +b: 0 +toTypeName(b): Decimal(76, 76) +``` + +**See also** + +- [`toDecimal256`](#todecimal256). +- [`toDecimal256OrNull`](#todecimal256ornull). +- [`toDecimal256OrDefault`](#todecimal256ordefault). + +## toDecimal256OrNull + +Like [`toDecimal256`](#todecimal256), this function converts an input value to a value of type [Nullable(Decimal(76, S))](../data-types/decimal.md) but returns `0` in case of an error. + +**Syntax** + +```sql +toDecimal256OrNull(expr, S) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256OrNull('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Nullable(Decimal(76, S))` if successful, otherwise value `NULL` of the same type. [Decimal256(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal256OrNull(toString(1/42), 76) AS a, + toTypeName(a), + toDecimal256OrNull(toString('Inf'), 76) as b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Nullable(Decimal(76, 76)) +b: ᴺᵁᴸᴸ +toTypeName(b): Nullable(Decimal(76, 76)) +``` + +**See also** + +- [`toDecimal256`](#todecimal256). +- [`toDecimal256OrZero`](#todecimal256orzero). +- [`toDecimal256OrDefault`](#todecimal256ordefault). + +## toDecimal256OrDefault + +Like [`toDecimal256`](#todecimal256), this function converts an input value to a value of type [Decimal(76, S)](../data-types/decimal.md) but returns the default value in case of an error. + +**Syntax** + +```sql +toDecimal256OrDefault(expr, S[, default]) +``` + +**Arguments** + +- `expr` — A String representation of a number. [String](../data-types/string.md). +- `S` — Scale parameter between 0 and 76, specifying how many digits the fractional part of a number can have. [UInt8](../data-types/int-uint.md). +- `default` (optional) — The default value to return if parsing to type `Decimal256(S)` is unsuccessful. [Decimal256(S)](../data-types/decimal.md). + +Supported arguments: +- String representations of type (U)Int8/16/32/64/128/256. +- String representations of type Float32/64. + +Unsupported arguments: +- String representations of Float32/64 values `NaN` and `Inf`. +- String representations of binary and hexadecimal values, e.g. `SELECT toDecimal256OrDefault('0xc0fe', 1);`. + +:::note +An overflow can occur if the value of `expr` exceeds the bounds of `Decimal256`: `( -1 * 10^(76 - S), 1 * 10^(76 - S) )`. +Excessive digits in a fraction are discarded (not rounded). +Excessive digits in the integer part will lead to an error. +::: + +**Returned value** + +- Value of type `Decimal(76, S)` if successful, otherwise returns the default value if passed or `0` if not. [Decimal256(S)](../data-types/decimal.md). + +**Examples** + +Query: + +``` sql +SELECT + toDecimal256OrDefault(toString(1/42), 76) AS a, + toTypeName(a), + toDecimal256OrDefault('Inf', 0, CAST('-1', 'Decimal256(0)')) AS b, + toTypeName(b) +FORMAT Vertical; +``` + +Result: + +```response +Row 1: +────── +a: 0.023809523809523808 +toTypeName(a): Decimal(76, 76) +b: -1 +toTypeName(b): Decimal(76, 0) +``` + +**See also** + +- [`toDecimal256`](#todecimal256). +- [`toDecimal256OrZero`](#todecimal256orzero). +- [`toDecimal256OrNull`](#todecimal256ornull). + ## toString Functions for converting between numbers, strings (but not fixed strings), dates, and dates with times. From c1b5b908ba13f863de4e9621d3fe2c8139758650 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 10 Aug 2024 13:01:55 +0000 Subject: [PATCH 100/132] hide Field::get in private, only use Field::safeGet --- programs/client/Client.cpp | 2 +- programs/keeper-client/Commands.cpp | 6 +- .../AggregateFunctionGroupArray.cpp | 12 +- .../AggregateFunctionGroupArrayIntersect.cpp | 6 +- .../AggregateFunctionGroupArrayMoving.cpp | 6 +- .../AggregateFunctionGroupArraySorted.cpp | 6 +- .../AggregateFunctionGroupConcat.cpp | 10 +- .../AggregateFunctionGroupUniqArray.cpp | 6 +- ...AggregateFunctionKolmogorovSmirnovTest.cpp | 4 +- ...ateFunctionLargestTriangleThreeBuckets.cpp | 2 +- .../AggregateFunctionMannWhitney.cpp | 4 +- .../AggregateFunctionQuantile.h | 6 +- .../AggregateFunctionSumMap.cpp | 10 +- .../Passes/ComparisonTupleEliminationPass.cpp | 2 +- .../Passes/ConvertOrLikeChainPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 6 +- src/Analyzer/Passes/FuseFunctionsPass.cpp | 2 +- .../Passes/IfTransformStringsToEnumPass.cpp | 10 +- .../Passes/NormalizeCountVariantsPass.cpp | 2 +- ...ateOrDateTimeConverterWithPreimagePass.cpp | 8 +- .../RewriteAggregateFunctionWithIfPass.cpp | 4 +- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 6 +- src/Analyzer/QueryTreeBuilder.cpp | 2 +- src/Analyzer/Resolve/IdentifierResolver.cpp | 8 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 4 +- src/Analyzer/SetUtils.cpp | 8 +- src/Backups/BackupSettings.cpp | 4 +- src/Backups/DDLAdjustingForBackupVisitor.cpp | 4 +- src/Backups/RestoreSettings.cpp | 8 +- src/Backups/SettingsFieldOptionalString.cpp | 2 +- src/Backups/SettingsFieldOptionalUUID.cpp | 2 +- src/Client/QueryFuzzer.cpp | 20 +-- src/Client/Suggest.cpp | 2 +- src/Columns/ColumnAggregateFunction.cpp | 19 +-- src/Columns/ColumnArray.cpp | 6 +- src/Columns/ColumnDecimal.h | 2 +- src/Columns/ColumnFixedString.cpp | 4 +- src/Columns/ColumnMap.cpp | 10 +- src/Columns/ColumnObject.cpp | 4 +- src/Columns/ColumnString.h | 2 +- src/Columns/ColumnTuple.cpp | 6 +- src/Columns/ColumnVector.h | 4 +- src/Columns/tests/gtest_column_variant.cpp | 120 +++++++++--------- src/Columns/tests/gtest_low_cardinality.cpp | 6 +- src/Common/CollectionOfDerived.h | 2 +- src/Common/FieldBinaryEncoding.cpp | 2 +- src/Common/FieldVisitorSum.cpp | 2 +- src/Common/FieldVisitorSum.h | 2 +- src/Common/FieldVisitorToString.cpp | 2 +- src/Common/HashTable/HashMap.h | 2 +- src/Common/examples/arena_with_free_lists.cpp | 22 ++-- src/Core/ExternalTable.cpp | 2 +- src/Core/Field.h | 71 ++++------- src/Core/Range.cpp | 16 +-- src/Core/Settings.cpp | 2 +- src/Core/SettingsFields.cpp | 14 +- src/Core/SettingsQuirks.cpp | 6 +- src/Core/examples/field.cpp | 2 +- src/Core/tests/gtest_field.cpp | 28 ++-- src/DataTypes/DataTypeAggregateFunction.cpp | 4 +- src/DataTypes/DataTypeDynamic.cpp | 4 +- src/DataTypes/DataTypeEnum.cpp | 18 +-- src/DataTypes/DataTypeFixedString.cpp | 4 +- src/DataTypes/DataTypeObject.cpp | 2 +- src/DataTypes/DataTypesDecimal.cpp | 6 +- src/DataTypes/ObjectUtils.cpp | 2 +- .../Serializations/JSONDataParser.cpp | 4 +- .../SerializationAggregateFunction.cpp | 4 +- .../Serializations/SerializationArray.cpp | 4 +- .../SerializationDecimalBase.cpp | 2 +- .../SerializationFixedString.cpp | 4 +- .../SerializationIPv4andIPv6.cpp | 2 +- .../Serializations/SerializationMap.cpp | 4 +- .../Serializations/SerializationNumber.cpp | 2 +- .../Serializations/SerializationString.cpp | 4 +- .../Serializations/SerializationTuple.cpp | 4 +- .../Serializations/SerializationUUID.cpp | 2 +- src/DataTypes/registerDataTypeDateTime.cpp | 2 +- src/Databases/DDLLoadingDependencyVisitor.cpp | 4 +- src/Databases/DDLRenamingVisitor.cpp | 6 +- src/Databases/DatabaseReplicated.cpp | 4 +- .../MySQL/MaterializedMySQLSyncThread.cpp | 18 +-- .../MySQL/tests/gtest_mysql_binlog.cpp | 30 ++--- src/Dictionaries/CacheDictionaryStorage.h | 12 +- src/Dictionaries/DictionaryHelpers.h | 4 +- src/Dictionaries/FlatDictionary.cpp | 6 +- src/Dictionaries/HashedArrayDictionary.cpp | 8 +- src/Dictionaries/HashedDictionary.h | 8 +- .../HierarchyDictionariesUtils.cpp | 6 +- src/Dictionaries/IPAddressDictionary.cpp | 8 +- src/Dictionaries/MongoDBDictionarySource.cpp | 3 +- src/Dictionaries/PolygonDictionary.cpp | 10 +- src/Dictionaries/RangeHashedDictionary.h | 4 +- src/Dictionaries/RedisDictionarySource.cpp | 3 +- src/Disks/getOrCreateDiskFromAST.cpp | 2 +- src/Functions/DateTimeTransforms.h | 4 +- src/Functions/FunctionsConsistentHashing.h | 4 +- src/Functions/FunctionsConversion.cpp | 30 ++--- src/Functions/FunctionsJSON.cpp | 2 +- src/Functions/FunctionsLogical.cpp | 6 +- src/Functions/FunctionsRound.h | 6 +- src/Functions/IFunctionCustomWeek.h | 8 +- src/Functions/IFunctionDateOrDateTime.h | 16 +-- .../JSONPath/Parsers/ParserJSONPathRange.cpp | 4 +- src/Functions/MultiMatchAllIndicesImpl.h | 2 +- src/Functions/MultiMatchAnyImpl.h | 2 +- src/Functions/MultiSearchAllPositionsImpl.h | 2 +- src/Functions/MultiSearchFirstIndexImpl.h | 2 +- src/Functions/MultiSearchFirstPositionImpl.h | 2 +- src/Functions/MultiSearchImpl.h | 2 +- src/Functions/URL/cutURLParameter.cpp | 2 +- src/Functions/array/arrayElement.cpp | 38 +++--- src/Functions/array/mapOp.cpp | 2 +- src/Functions/getClientHTTPHeader.cpp | 2 +- src/Functions/multiIf.cpp | 2 +- src/Functions/nested.cpp | 4 +- src/IO/S3Common.cpp | 8 +- src/Interpreters/ActionsVisitor.cpp | 12 +- src/Interpreters/AddDefaultDatabaseVisitor.h | 2 +- .../ComparisonTupleEliminationVisitor.cpp | 2 +- .../ConvertFunctionOrLikeVisitor.cpp | 4 +- .../ConvertStringsToEnumVisitor.cpp | 12 +- src/Interpreters/DDLTask.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 4 +- src/Interpreters/InterpreterExplainQuery.cpp | 4 +- .../InterpreterKillQueryQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/MutationsInterpreter.cpp | 2 +- ...OrDateTimeConverterWithPreimageVisitor.cpp | 8 +- ...OptimizeIfWithConstantConditionVisitor.cpp | 4 +- .../OptimizeShardingKeyRewriteInVisitor.cpp | 21 ++- .../RewriteCountVariantsVisitor.cpp | 2 +- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Interpreters/WindowDescription.cpp | 8 +- src/Interpreters/convertFieldToType.cpp | 62 ++++----- .../evaluateConstantExpression.cpp | 4 +- .../replaceForPositionalArguments.cpp | 4 +- .../tests/gtest_comparison_graph.cpp | 4 +- src/Parsers/ASTFunction.cpp | 2 +- src/Parsers/ASTLiteral.cpp | 8 +- src/Parsers/Access/ParserCreateQuotaQuery.cpp | 2 +- src/Parsers/ExpressionElementParsers.cpp | 10 +- src/Parsers/ParserAlterQuery.cpp | 12 +- src/Parsers/ParserCheckQuery.cpp | 2 +- src/Parsers/ParserCreateQuery.cpp | 4 +- src/Parsers/ParserDictionary.cpp | 6 +- src/Parsers/ParserPartition.cpp | 2 +- src/Parsers/ParserSystemQuery.cpp | 8 +- src/Parsers/ParserUndropQuery.cpp | 2 +- src/Parsers/tests/gtest_dictionary_parser.cpp | 40 +++--- .../Formats/Impl/CHColumnToArrowColumn.cpp | 2 +- .../Impl/ConstantExpressionTemplate.cpp | 6 +- .../Impl/NativeORCBlockInputFormat.cpp | 4 +- .../Impl/PrometheusTextOutputFormat.cpp | 4 +- .../Formats/Impl/ValuesBlockInputFormat.cpp | 8 +- .../Algorithms/SummingSortedAlgorithm.cpp | 12 +- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 10 +- src/Processors/Sources/MySQLSource.cpp | 4 +- .../Transforms/FillingTransform.cpp | 14 +- src/Processors/Transforms/WindowTransform.cpp | 20 +-- src/Storages/AlterCommands.cpp | 8 +- src/Storages/ColumnsDescription.cpp | 2 +- ...pproximateNearestNeighborIndexesCommon.cpp | 12 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 8 +- src/Storages/MergeTree/KeyCondition.cpp | 12 +- src/Storages/MergeTree/MergeTreeData.cpp | 8 +- .../MergeTree/MergeTreeDataWriter.cpp | 4 +- .../MergeTree/MergeTreeIndexAnnoy.cpp | 6 +- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 12 +- .../MergeTreeIndexBloomFilterText.cpp | 50 ++++---- .../MergeTree/MergeTreeIndexFullText.cpp | 42 +++--- .../MergeTree/MergeTreeIndexHypothesis.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 4 +- .../MergeTree/MergeTreeIndexUSearch.cpp | 8 +- src/Storages/MergeTree/MergeTreePartition.cpp | 2 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 6 +- .../MergeTree/registerStorageMergeTree.cpp | 2 +- src/Storages/Statistics/Statistics.cpp | 16 +-- .../Statistics/StatisticsCountMinSketch.cpp | 2 +- src/Storages/StorageFactory.cpp | 2 +- src/Storages/StorageFile.cpp | 6 +- src/Storages/StorageFuzzJSON.cpp | 2 +- src/Storages/StorageJoin.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/System/StorageSystemColumns.cpp | 6 +- .../System/StorageSystemPartsBase.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.h | 10 +- src/Storages/VirtualColumnUtils.h | 2 +- src/Storages/getStructureOfRemoteTable.cpp | 12 +- src/TableFunctions/TableFunctionExplain.cpp | 4 +- src/TableFunctions/TableFunctionFile.cpp | 2 +- .../TableFunctionMergeTreeIndex.cpp | 4 +- 192 files changed, 749 insertions(+), 774 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1d99d223ee9..25c94c56aa6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -223,7 +223,7 @@ std::vector Client::loadWarningMessages() size_t rows = packet.block.rows(); for (size_t i = 0; i < rows; ++i) - messages.emplace_back(column[i].get()); + messages.emplace_back(column[i].safeGet()); } continue; diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index df9da8e9613..7226bd82df7 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -95,7 +95,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co client->zookeeper->set( client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet(), - static_cast(query->args[2].get())); + static_cast(query->args[2].safeGet())); } bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -494,7 +494,7 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con { Int32 version{-1}; if (query->args.size() == 2) - version = static_cast(query->args[1].get()); + version = static_cast(query->args[1].safeGet()); client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet()), version); } @@ -549,7 +549,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient String leaving; String new_members; - auto operation = query->args[0].get(); + auto operation = query->args[0].safeGet(); switch (operation) { case static_cast(ReconfigCommand::Operation::ADD): diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 7034e6373b1..5cc9f725b46 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -780,12 +780,12 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); has_limit = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, @@ -816,11 +816,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[i].get() < 0) || - (type == Field::Types::UInt64 && parameters[i].get() == 0)) + if ((type == Field::Types::Int64 && parameters[i].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[i].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - return parameters[i].get(); + return parameters[i].safeGet(); }; UInt64 max_elems = get_parameter(0); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 38f2fcb9fb9..36d00b1d9ec 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -83,16 +83,16 @@ public: if (version == 1) { for (size_t i = 0; i < arr_size; ++i) - set.insert(static_cast((*data_column)[offset + i].get())); + set.insert(static_cast((*data_column)[offset + i].safeGet())); } else if (!set.empty()) { typename State::Set new_set; for (size_t i = 0; i < arr_size; ++i) { - typename State::Set::LookupResult set_value = set.find(static_cast((*data_column)[offset + i].get())); + typename State::Set::LookupResult set_value = set.find(static_cast((*data_column)[offset + i].safeGet())); if (set_value != nullptr) - new_set.insert(static_cast((*data_column)[offset + i].get())); + new_set.insert(static_cast((*data_column)[offset + i].safeGet())); } set = std::move(new_set); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index 026b8d1956f..2c3ac7f883e 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -269,12 +269,12 @@ AggregateFunctionPtr createAggregateFunctionMoving( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); - if ((type == Field::Types::Int64 && parameters[0].get() <= 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() <= 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name); limit_size = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp index d41d743e17a..27043ed6aa6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp @@ -397,11 +397,11 @@ AggregateFunctionPtr createAggregateFunctionGroupArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index 5494ef74705..636ac80e350 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -247,7 +247,7 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat( if (type != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name); - delimiter = parameters[0].get(); + delimiter = parameters[0].safeGet(); } if (parameters.size() == 2) { @@ -256,12 +256,12 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name); - if ((type == Field::Types::Int64 && parameters[1].get() <= 0) || - (type == Field::Types::UInt64 && parameters[1].get() == 0)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get()); + if ((type == Field::Types::Int64 && parameters[1].safeGet() <= 0) || + (type == Field::Types::UInt64 && parameters[1].safeGet() == 0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].safeGet()); has_limit = true; - limit = parameters[1].get(); + limit = parameters[1].safeGet(); } if (has_limit) diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp index 7b4300b3568..5cbf449c946 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.cpp @@ -323,12 +323,12 @@ AggregateFunctionPtr createAggregateFunctionGroupUniqArray( if (type != Field::Types::Int64 && type != Field::Types::UInt64) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); - if ((type == Field::Types::Int64 && parameters[0].get() < 0) || - (type == Field::Types::UInt64 && parameters[0].get() == 0)) + if ((type == Field::Types::Int64 && parameters[0].safeGet() < 0) || + (type == Field::Types::UInt64 && parameters[0].safeGet() == 0)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name); limit_size = true; - max_elems = parameters[0].get(); + max_elems = parameters[0].safeGet(); } else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp index 04eebe9f485..28e8d37b8c8 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp @@ -238,7 +238,7 @@ public: if (params[0].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName()); - const auto & param = params[0].get(); + const auto & param = params[0].safeGet(); if (param == "two-sided") alternative = Alternative::TwoSided; else if (param == "less") @@ -255,7 +255,7 @@ public: if (params[1].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName()); - method = params[1].get(); + method = params[1].safeGet(); if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. " "It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName()); diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp index 6d1e3c0f64b..813b13b6f7b 100644 --- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -181,7 +181,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName()); - total_buckets = params[0].get(); + total_buckets = params[0].safeGet(); this->x_type = WhichDataType(arguments[0]).idx; this->y_type = WhichDataType(arguments[1]).idx; diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index f088737c340..fa90846650d 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -152,7 +152,7 @@ public: if (params[0].getType() != Field::Types::String) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a String", getName()); - const auto & param = params[0].get(); + const auto & param = params[0].safeGet(); if (param == "two-sided") alternative = Alternative::TwoSided; else if (param == "less") @@ -169,7 +169,7 @@ public: if (params[1].getType() != Field::Types::UInt64) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a UInt64", getName()); - continuity_correction = static_cast(params[1].get()); + continuity_correction = static_cast(params[1].safeGet()); } String getName() const override diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 127dc06b642..423fd4bc569 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -117,7 +117,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName()); - relative_accuracy = relative_accuracy_field.get(); + relative_accuracy = relative_accuracy_field.safeGet(); if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy)) throw Exception( @@ -147,9 +147,9 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName()); if (accuracy_field.getType() == Field::Types::Int64) - accuracy = accuracy_field.get(); + accuracy = accuracy_field.safeGet(); else - accuracy = accuracy_field.get(); + accuracy = accuracy_field.safeGet(); if (accuracy <= 0) throw Exception( diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 666136a91b7..9a94c3dfe1a 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -300,12 +300,12 @@ public: /// Compatibility with previous versions. if (value.getType() == Field::Types::Decimal32) { - auto source = value.get>(); + auto source = value.safeGet>(); value = DecimalField(source.getValue(), source.getScale()); } else if (value.getType() == Field::Types::Decimal64) { - auto source = value.get>(); + auto source = value.safeGet>(); value = DecimalField(source.getValue(), source.getScale()); } @@ -355,7 +355,7 @@ public: /// Compatibility with previous versions. if (value.getType() == Field::Types::Decimal128) { - auto source = value.get>(); + auto source = value.safeGet>(); WhichDataType value_type(values_types[col_idx]); if (value_type.isDecimal32()) { @@ -560,7 +560,7 @@ private: template bool compareImpl(FieldType & x) const { - auto val = rhs.get(); + auto val = rhs.safeGet(); if (val > x) { x = val; @@ -600,7 +600,7 @@ private: template bool compareImpl(FieldType & x) const { - auto val = rhs.get(); + auto val = rhs.safeGet(); if (val < x) { x = val; diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp index 180470952cd..4bc4255e2e2 100644 --- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp +++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp @@ -129,7 +129,7 @@ private: if (constant_node_value.getType() != Field::Types::Which::Tuple) return {}; - const auto & constant_tuple = constant_node_value.get(); + const auto & constant_tuple = constant_node_value.safeGet(); const auto & function_arguments_nodes = function_node_typed.getArguments().getNodes(); size_t function_arguments_nodes_size = function_arguments_nodes.size(); diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 2b2ac95d7b9..6c4ce789993 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -89,7 +89,7 @@ public: if (!pattern || !isString(pattern->getResultType())) continue; - auto regexp = likePatternToRegexp(pattern->getValue().get()); + auto regexp = likePatternToRegexp(pattern->getValue().safeGet()); /// Case insensitive. Works with UTF-8 as well. if (is_ilike) regexp = "(?i)" + regexp; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index b968f43c6a6..1fc3eec6833 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -68,10 +68,10 @@ void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, Col String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple) { if (value.getType() == Field::Types::String) - return value.get(); + return value.safeGet(); if (value.getType() == Field::Types::UInt64) - return data_type_tuple.getNameByPosition(value.get()); + return data_type_tuple.getNameByPosition(value.safeGet()); return ""; } @@ -79,7 +79,7 @@ String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & dat String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &) { if (value.getType() == Field::Types::String) - return value.get(); + return value.safeGet(); return ""; } diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 0175e304a2b..f3b109a10ed 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -187,7 +187,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector & nodes /// Sort nodes and parameters in ascending order of quantile level std::vector permutation(nodes.size()); iota(permutation.data(), permutation.size(), size_t(0)); - std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get() < parameters[j].get(); }); + std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].safeGet() < parameters[j].safeGet(); }); std::vector new_nodes; new_nodes.reserve(permutation.size()); diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index d966f129d08..f81327c5d55 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -134,8 +134,8 @@ public: return; std::set string_values; - string_values.insert(first_literal->getValue().get()); - string_values.insert(second_literal->getValue().get()); + string_values.insert(first_literal->getValue().safeGet()); + string_values.insert(second_literal->getValue().safeGet()); changeIfArguments(*function_if_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_if_node), context); @@ -163,7 +163,7 @@ public: if (!isArray(literal_to->getResultType()) || !isString(literal_default->getResultType())) return; - auto array_to = literal_to->getValue().get(); + auto array_to = literal_to->getValue().safeGet(); if (array_to.empty()) return; @@ -178,9 +178,9 @@ public: std::set string_values; for (const auto & value : array_to) - string_values.insert(value.get()); + string_values.insert(value.safeGet()); - string_values.insert(literal_default->getValue().get()); + string_values.insert(literal_default->getValue().safeGet()); changeTransformArguments(*function_modified_transform_node, string_values, context); wrapIntoToString(*function_node, std::move(modified_transform_node), context); diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index 3a8b6e75d40..02f1c93ea7f 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -54,7 +54,7 @@ public: } else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 && - first_argument_constant_literal.get() == 1) + first_argument_constant_literal.safeGet() == 1) { function_node->getArguments().getNodes().clear(); resolveAggregateFunctionNodeByName(*function_node, "count"); diff --git a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp index feb8bcc792d..0f33c302265 100644 --- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp +++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp @@ -143,13 +143,13 @@ private: const auto & column_type = column_node_typed.getColumnType().get(); if (isDateOrDate32(column_type)) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time = date_lut.dateToString(range.first.safeGet()); + end_date_or_date_time = date_lut.dateToString(range.second.safeGet()); } else if (isDateTime(column_type) || isDateTime64(column_type)) { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); + start_date_or_date_time = date_lut.timeToString(range.first.safeGet()); + end_date_or_date_time = date_lut.timeToString(range.second.safeGet()); } else [[unlikely]] return {}; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index a48e88132a6..091061ceb81 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -60,7 +60,7 @@ public: { const auto & second_const_value = second_const_node->getValue(); if (second_const_value.isNull() - || (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.get() == 0 + || (lower_name == "sum" && isInt64OrUInt64FieldType(second_const_value.getType()) && second_const_value.safeGet() == 0 && !if_node->getResultType()->isNullable())) { /// avg(if(cond, a, null)) -> avgIf(a::ResultTypeIf, cond) @@ -89,7 +89,7 @@ public: { const auto & first_const_value = first_const_node->getValue(); if (first_const_value.isNull() - || (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.get() == 0 + || (lower_name == "sum" && isInt64OrUInt64FieldType(first_const_value.getType()) && first_const_value.safeGet() == 0 && !if_node->getResultType()->isNullable())) { /// avg(if(cond, null, a) -> avgIf(a::ResultTypeIf, !cond)) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1524629dc81..a987ced497a 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -66,7 +66,7 @@ public: resolveAggregateFunctionNodeByName(*function_node, "countIf"); - if (constant_value_literal.get() != 1) + if (constant_value_literal.safeGet() != 1) { /// Rewrite `sumIf(123, cond)` into `123 * countIf(cond)` node = getMultiplyFunction(std::move(multiplier_node), node); @@ -105,8 +105,8 @@ public: const auto & if_true_condition_constant_value_literal = if_true_condition_constant_node->getValue(); const auto & if_false_condition_constant_value_literal = if_false_condition_constant_node->getValue(); - auto if_true_condition_value = if_true_condition_constant_value_literal.get(); - auto if_false_condition_value = if_false_condition_constant_value_literal.get(); + auto if_true_condition_value = if_true_condition_constant_value_literal.safeGet(); + auto if_false_condition_value = if_false_condition_constant_value_literal.safeGet(); if (if_false_condition_value == 0) { diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index fb41826929f..9754897d54d 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -471,7 +471,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express std::shared_ptr collator; if (order_by_element.getCollation()) - collator = std::make_shared(order_by_element.getCollation()->as().value.get()); + collator = std::make_shared(order_by_element.getCollation()->as().value.safeGet()); const auto & sort_expression_ast = order_by_element.children.at(0); auto sort_expression = buildExpression(sort_expression_ast, context); diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 447bf825836..a79433ac130 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1273,7 +1273,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns( const auto & constant_node_value = constant_node.getValue(); if (constant_node_value.getType() == Field::Types::String) { - array_join_subcolumn_prefix = constant_node_value.get() + "."; + array_join_subcolumn_prefix = constant_node_value.safeGet() + "."; array_join_parent_column = argument_nodes.at(0).get(); } } @@ -1287,7 +1287,7 @@ QueryTreeNodePtr IdentifierResolver::matchArrayJoinSubcolumns( if (!second_argument || second_argument->getValue().getType() != Field::Types::String) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected constant string as second argument of getSubcolumn function {}", resolved_function->dumpTree()); - const auto & resolved_subcolumn_path = second_argument->getValue().get(); + const auto & resolved_subcolumn_path = second_argument->getValue().safeGet(); if (!startsWith(resolved_subcolumn_path, array_join_subcolumn_prefix)) return {}; @@ -1331,7 +1331,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression size_t nested_function_arguments_size = nested_function_arguments.size(); const auto & nested_keys_names_constant_node = nested_function_arguments[0]->as(); - const auto & nested_keys_names = nested_keys_names_constant_node.getValue().get(); + const auto & nested_keys_names = nested_keys_names_constant_node.getValue().safeGet(); size_t nested_keys_names_size = nested_keys_names.size(); if (nested_keys_names_size == nested_function_arguments_size - 1) @@ -1344,7 +1344,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveExpressionFromArrayJoinExpression auto array_join_column = std::make_shared(array_join_column_expression_typed.getColumn(), array_join_column_expression_typed.getColumnSource()); - const auto & nested_key_name = nested_keys_names[i - 1].get(); + const auto & nested_key_name = nested_keys_names[i - 1].safeGet(); Identifier nested_identifier = Identifier(nested_key_name); array_join_resolved_expression = wrapExpressionNodeInTupleElement(array_join_column, nested_identifier, scope.context); break; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 767d5c11075..2ce79b7bddd 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -746,11 +746,11 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_ UInt64 pos; if (constant_node->getValue().getType() == Field::Types::UInt64) { - pos = constant_node->getValue().get(); + pos = constant_node->getValue().safeGet(); } else // Int64 { - auto value = constant_node->getValue().get(); + auto value = constant_node->getValue().safeGet(); if (value > 0) pos = value; else diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index 0ecb3545225..095be1fb9b8 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -93,7 +93,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& "Invalid type in set. Expected tuple, got {}", value.getTypeName()); - const auto & tuple = value.template get(); + const auto & tuple = value.template safeGet(); const DataTypePtr & value_type = value_types[collection_index]; const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); @@ -169,15 +169,15 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (rhs_which_type.isArray()) { const DataTypeArray * value_array_type = assert_cast(value_type.get()); - size_t value_array_size = value.get().size(); + size_t value_array_size = value.safeGet().size(); DataTypes value_types(value_array_size, value_array_type->getNestedType()); - result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + result_block = createBlockFromCollection(value.safeGet(), value_types, set_element_types, transform_null_in); } else if (rhs_which_type.isTuple()) { const DataTypeTuple * value_tuple_type = assert_cast(value_type.get()); const DataTypes & value_types = value_tuple_type->getElements(); - result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + result_block = createBlockFromCollection(value.safeGet(), value_types, set_element_types, transform_null_in); } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index e33880f88e3..7f353028346 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -125,7 +125,7 @@ std::vector BackupSettings::Util::clusterHostIDsFromAST(const IAST & as throw Exception( ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Setting cluster_host_ids has wrong format, must be array of arrays of string literals"); - const auto & replicas = array_of_replicas->value.get(); + const auto & replicas = array_of_replicas->value.safeGet(); res[i].resize(replicas.size()); for (size_t j = 0; j != replicas.size(); ++j) { @@ -134,7 +134,7 @@ std::vector BackupSettings::Util::clusterHostIDsFromAST(const IAST & as throw Exception( ErrorCodes::CANNOT_PARSE_BACKUP_SETTINGS, "Setting cluster_host_ids has wrong format, must be array of arrays of string literals"); - res[i][j] = replica.get(); + res[i][j] = replica.safeGet(); } } } diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 910831195a3..4dcbdcc1617 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -46,8 +46,8 @@ namespace if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) { - String & zookeeper_path_arg = zookeeper_path_ast->value.get(); - String & replica_name_arg = replica_name_ast->value.get(); + String & zookeeper_path_arg = zookeeper_path_ast->value.safeGet(); + String & replica_name_arg = replica_name_ast->value.safeGet(); if (create.uuid != UUIDHelpers::Nil) { String table_uuid_str = toString(create.uuid); diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index 7bbfd9ed751..4662305cdd6 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -31,7 +31,7 @@ namespace { if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create")) { value = RestoreTableCreationMode::kCreate; @@ -54,7 +54,7 @@ namespace if (field.getType() == Field::Types::UInt64) { - UInt64 number = field.get(); + UInt64 number = field.safeGet(); if (number == 1) { value = RestoreTableCreationMode::kCreate; @@ -95,7 +95,7 @@ namespace { if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str == "1" || boost::iequals(str, "true") || boost::iequals(str, "create")) { value = RestoreAccessCreationMode::kCreate; @@ -118,7 +118,7 @@ namespace if (field.getType() == Field::Types::UInt64) { - UInt64 number = field.get(); + UInt64 number = field.safeGet(); if (number == 1) { value = RestoreAccessCreationMode::kCreate; diff --git a/src/Backups/SettingsFieldOptionalString.cpp b/src/Backups/SettingsFieldOptionalString.cpp index 573fd1e052c..684407a533d 100644 --- a/src/Backups/SettingsFieldOptionalString.cpp +++ b/src/Backups/SettingsFieldOptionalString.cpp @@ -19,7 +19,7 @@ SettingFieldOptionalString::SettingFieldOptionalString(const Field & field) if (field.getType() == Field::Types::String) { - value = field.get(); + value = field.safeGet(); return; } diff --git a/src/Backups/SettingsFieldOptionalUUID.cpp b/src/Backups/SettingsFieldOptionalUUID.cpp index 3f14608b206..0011f7f1073 100644 --- a/src/Backups/SettingsFieldOptionalUUID.cpp +++ b/src/Backups/SettingsFieldOptionalUUID.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes if (field.getType() == Field::Types::String) { - const String & str = field.get(); + const String & str = field.safeGet(); if (str.empty()) { value = std::nullopt; diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index f5b700ea529..cc7b37aad52 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -133,7 +133,7 @@ Field QueryFuzzer::fuzzField(Field field) if (type == Field::Types::String) { - auto & str = field.get(); + auto & str = field.safeGet(); UInt64 action = fuzz_rand() % 10; switch (action) { @@ -159,7 +159,7 @@ Field QueryFuzzer::fuzzField(Field field) } else if (type == Field::Types::Array) { - auto & arr = field.get(); + auto & arr = field.safeGet(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { @@ -191,7 +191,7 @@ Field QueryFuzzer::fuzzField(Field field) } else if (type == Field::Types::Tuple) { - auto & arr = field.get(); + auto & arr = field.safeGet(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { @@ -912,17 +912,17 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) auto type = l->value.getType(); if (type == Field::Types::Which::String && fuzz_rand() % 7 == 0) { - String value = l->value.get(); + String value = l->value.safeGet(); child = makeASTFunction( "toFixedString", std::make_shared(value), std::make_shared(static_cast(value.size()))); } else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0) { - child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.get())); + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.safeGet())); } else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0) { - child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.get())); + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.safeGet())); } else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0) { @@ -930,22 +930,22 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) if (decimal == 0) child = makeASTFunction( "toDecimal32", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 9))); else if (decimal == 1) child = makeASTFunction( "toDecimal64", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 18))); else if (decimal == 2) child = makeASTFunction( "toDecimal128", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 38))); else child = makeASTFunction( "toDecimal256", - std::make_shared(l->value.get()), + std::make_shared(l->value.safeGet()), std::make_shared(static_cast(fuzz_rand() % 76))); } diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 0188ebc8173..affd620f83a 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -214,7 +214,7 @@ void Suggest::fillWordsFromBlock(const Block & block) Words new_words; new_words.reserve(rows); for (size_t i = 0; i < rows; ++i) - new_words.emplace_back(column[i].get()); + new_words.emplace_back(column[i].safeGet()); addWords(std::move(new_words)); } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index e26fe790a8e..c41c340b069 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -426,9 +426,9 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const Field ColumnAggregateFunction::operator[](size_t n) const { Field field = AggregateFunctionStateData(); - field.get().name = type_string; + field.safeGet().name = type_string; { - WriteBufferFromString buffer(field.get().data); + WriteBufferFromString buffer(field.safeGet().data); func->serialize(data[n], buffer, version); } return field; @@ -436,12 +436,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const void ColumnAggregateFunction::get(size_t n, Field & res) const { - res = AggregateFunctionStateData(); - res.get().name = type_string; - { - WriteBufferFromString buffer(res.get().data); - func->serialize(data[n], buffer, version); - } + res = operator[](n); } StringRef ColumnAggregateFunction::getDataAt(size_t n) const @@ -521,7 +516,7 @@ void ColumnAggregateFunction::insert(const Field & x) "Inserting field of type {} into ColumnAggregateFunction. Expected {}", x.getTypeName(), Field::Types::AggregateFunctionState); - const auto & field_name = x.get().name; + const auto & field_name = x.safeGet().name; if (type_string != field_name) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot insert filed with type {} into column with type {}", field_name, type_string); @@ -529,7 +524,7 @@ void ColumnAggregateFunction::insert(const Field & x) ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().data); + ReadBufferFromString read_buffer(x.safeGet().data); func->deserialize(data.back(), read_buffer, version, &arena); } @@ -538,14 +533,14 @@ bool ColumnAggregateFunction::tryInsert(const DB::Field & x) if (x.getType() != Field::Types::AggregateFunctionState) return false; - const auto & field_name = x.get().name; + const auto & field_name = x.safeGet().name; if (type_string != field_name) return false; ensureOwnership(); Arena & arena = createOrGetArena(); pushBackAndCreateState(data, arena, func.get()); - ReadBufferFromString read_buffer(x.get().data); + ReadBufferFromString read_buffer(x.safeGet().data); func->deserialize(data.back(), read_buffer, version, &arena); return true; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 19cce678cc7..5379adc0bf7 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -141,7 +141,7 @@ void ColumnArray::get(size_t n, Field & res) const size, max_array_size_as_field); res = Array(); - Array & res_arr = res.get(); + Array & res_arr = res.safeGet(); res_arr.reserve(size); for (size_t i = 0; i < size; ++i) @@ -309,7 +309,7 @@ void ColumnArray::updateHashFast(SipHash & hash) const void ColumnArray::insert(const Field & x) { - const Array & array = x.get(); + const Array & array = x.safeGet(); size_t size = array.size(); for (size_t i = 0; i < size; ++i) getData().insert(array[i]); @@ -321,7 +321,7 @@ bool ColumnArray::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Array) return false; - const Array & array = x.get(); + const Array & array = x.safeGet(); size_t size = array.size(); for (size_t i = 0; i < size; ++i) { diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 59bfbd2159c..07120f5f035 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -74,7 +74,7 @@ public: void insertData(const char * src, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } - void insert(const Field & x) override { data.push_back(x.get()); } + void insert(const Field & x) override { data.push_back(x.safeGet()); } bool tryInsert(const Field & x) override; #if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 0bb3f7edb14..04e894ee5ab 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -59,7 +59,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const void ColumnFixedString::insert(const Field & x) { - const String & s = x.get(); + const String & s = x.safeGet(); insertData(s.data(), s.size()); } @@ -67,7 +67,7 @@ bool ColumnFixedString::tryInsert(const Field & x) { if (x.getType() != Field::Types::Which::String) return false; - const String & s = x.get(); + const String & s = x.safeGet(); if (s.size() > n) return false; insertData(s.data(), s.size()); diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 1025b4e77b9..6a6618bd81e 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -72,7 +72,7 @@ void ColumnMap::get(size_t n, Field & res) const size_t size = offsets[n] - offsets[n - 1]; res = Map(); - auto & map = res.get(); + auto & map = res.safeGet(); map.reserve(size); for (size_t i = 0; i < size; ++i) @@ -96,7 +96,7 @@ void ColumnMap::insertData(const char *, size_t) void ColumnMap::insert(const Field & x) { - const auto & map = x.get(); + const auto & map = x.safeGet(); nested->insert(Array(map.begin(), map.end())); } @@ -105,7 +105,7 @@ bool ColumnMap::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Map) return false; - const auto & map = x.get(); + const auto & map = x.safeGet(); return nested->tryInsert(Array(map.begin(), map.end())); } @@ -288,8 +288,8 @@ void ColumnMap::getExtremes(Field & min, Field & max) const /// Convert result Array fields to Map fields because client expect min and max field to have type Map - Array nested_min_value = nested_min.get(); - Array nested_max_value = nested_max.get(); + Array nested_min_value = nested_min.safeGet(); + Array nested_max_value = nested_max.safeGet(); Map map_min_value(nested_min_value.begin(), nested_min_value.end()); Map map_max_value(nested_max_value.begin(), nested_max_value.end()); diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index a6431007cb6..eb99bb4081b 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -698,7 +698,7 @@ void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback ca void ColumnObject::insert(const Field & field) { - const auto & object = field.get(); + const auto & object = field.safeGet(); HashSet inserted_paths; size_t old_size = size(); @@ -754,7 +754,7 @@ void ColumnObject::get(size_t n, Field & res) const { assert(n < size()); res = Object(); - auto & object = res.get(); + auto & object = res.safeGet(); for (const auto & entry : subcolumns) { diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c1012e1e55e..65aff9e6255 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -123,7 +123,7 @@ public: void insert(const Field & x) override { - const String & s = x.get(); + const String & s = x.safeGet(); const size_t old_size = chars.size(); const size_t size_to_append = s.size() + 1; const size_t new_size = old_size + size_to_append; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 4fc3f88a87c..251be8d9986 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -141,7 +141,7 @@ void ColumnTuple::get(size_t n, Field & res) const const size_t tuple_size = columns.size(); res = Tuple(); - Tuple & res_tuple = res.get(); + Tuple & res_tuple = res.safeGet(); res_tuple.reserve(tuple_size); for (size_t i = 0; i < tuple_size; ++i) @@ -169,7 +169,7 @@ void ColumnTuple::insertData(const char *, size_t) void ColumnTuple::insert(const Field & x) { - const auto & tuple = x.get(); + const auto & tuple = x.safeGet(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) @@ -185,7 +185,7 @@ bool ColumnTuple::tryInsert(const Field & x) if (x.getType() != Field::Types::Which::Tuple) return false; - const auto & tuple = x.get(); + const auto & tuple = x.safeGet(); const size_t tuple_size = columns.size(); if (tuple.size() != tuple_size) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 2fe5b635bd2..a5e1ee4b462 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -85,7 +85,7 @@ public: void insertMany(const Field & field, size_t length) override { - data.resize_fill(data.size() + length, static_cast(field.get())); + data.resize_fill(data.size() + length, static_cast(field.safeGet())); } void insertData(const char * pos, size_t) override @@ -235,7 +235,7 @@ public: void insert(const Field & x) override { - data.push_back(static_cast(x.get())); + data.push_back(static_cast(x.safeGet())); } bool tryInsert(const DB::Field & x) override; diff --git a/src/Columns/tests/gtest_column_variant.cpp b/src/Columns/tests/gtest_column_variant.cpp index 25f276b9600..5e481b88409 100644 --- a/src/Columns/tests/gtest_column_variant.cpp +++ b/src/Columns/tests/gtest_column_variant.cpp @@ -108,10 +108,10 @@ void checkColumnVariant1(ColumnVariant * column) ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[3], 1); ASSERT_TRUE(column->isDefaultAt(2) && column->isDefaultAt(4)); - ASSERT_EQ((*column)[0].get(), 42); - ASSERT_EQ((*column)[1].get(), "Hello"); + ASSERT_EQ((*column)[0].safeGet(), 42); + ASSERT_EQ((*column)[1].safeGet(), "Hello"); ASSERT_TRUE((*column)[2].isNull()); - ASSERT_EQ((*column)[3].get(), "World"); + ASSERT_EQ((*column)[3].safeGet(), "World"); ASSERT_TRUE((*column)[4].isNull()); } @@ -209,9 +209,9 @@ TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNulls) ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 1); ASSERT_EQ(offsets[2], 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); - ASSERT_EQ((*column)[2].get(), 2); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); + ASSERT_EQ((*column)[2].safeGet(), 2); } TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrder) @@ -222,9 +222,9 @@ TEST(ColumnVariant, CreateFromDiscriminatorsAndOneFullColumnNoNullsWithLocalOrde ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 1); ASSERT_EQ(offsets[2], 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); - ASSERT_EQ((*column)[2].get(), 2); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); + ASSERT_EQ((*column)[2].safeGet(), 2); ASSERT_EQ(column->localDiscriminatorAt(0), 2); ASSERT_EQ(column->localDiscriminatorAt(1), 2); ASSERT_EQ(column->localDiscriminatorAt(2), 2); @@ -331,9 +331,9 @@ TEST(ColumnVariant, CloneResizedGeneral1) ASSERT_EQ(offsets[0], 0); ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[3], 1); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); - ASSERT_EQ((*resized_column_variant)[1].get(), "Hello"); - ASSERT_EQ((*resized_column_variant)[3].get(), 43); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); + ASSERT_EQ((*resized_column_variant)[1].safeGet(), "Hello"); + ASSERT_EQ((*resized_column_variant)[3].safeGet(), 43); } TEST(ColumnVariant, CloneResizedGeneral2) @@ -367,7 +367,7 @@ TEST(ColumnVariant, CloneResizedGeneral2) ASSERT_EQ(discriminators[2], ColumnVariant::NULL_DISCRIMINATOR); const auto & offsets = resized_column_variant->getOffsets(); ASSERT_EQ(offsets[0], 0); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); } TEST(ColumnVariant, CloneResizedGeneral3) @@ -405,10 +405,10 @@ TEST(ColumnVariant, CloneResizedGeneral3) ASSERT_EQ(offsets[1], 0); ASSERT_EQ(offsets[2], 1); ASSERT_EQ(offsets[3], 1); - ASSERT_EQ((*resized_column_variant)[0].get(), 42); - ASSERT_EQ((*resized_column_variant)[1].get(), "Hello"); - ASSERT_EQ((*resized_column_variant)[2].get(), "World"); - ASSERT_EQ((*resized_column_variant)[3].get(), 43); + ASSERT_EQ((*resized_column_variant)[0].safeGet(), 42); + ASSERT_EQ((*resized_column_variant)[1].safeGet(), "Hello"); + ASSERT_EQ((*resized_column_variant)[2].safeGet(), "World"); + ASSERT_EQ((*resized_column_variant)[3].safeGet(), 43); } MutableColumnPtr createDiscriminators2() @@ -465,7 +465,7 @@ TEST(ColumnVariant, InsertFrom) auto column_from = createVariantColumn2(change_order); column_to->insertFrom(*column_from, 3); ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0); - ASSERT_EQ((*column_to)[5].get(), 43); + ASSERT_EQ((*column_to)[5].safeGet(), 43); } } @@ -478,8 +478,8 @@ TEST(ColumnVariant, InsertRangeFromOneColumnNoNulls) column_to->insertRangeFrom(*column_from, 2, 2); ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(8), 0); - ASSERT_EQ((*column_to)[7].get(), 2); - ASSERT_EQ((*column_to)[8].get(), 3); + ASSERT_EQ((*column_to)[7].safeGet(), 2); + ASSERT_EQ((*column_to)[8].safeGet(), 3); } } @@ -494,9 +494,9 @@ TEST(ColumnVariant, InsertRangeFromGeneral) ASSERT_EQ(column_to->globalDiscriminatorAt(6), ColumnVariant::NULL_DISCRIMINATOR); ASSERT_EQ(column_to->globalDiscriminatorAt(7), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(8), 1); - ASSERT_EQ((*column_to)[5].get(), "Hello"); - ASSERT_EQ((*column_to)[7].get(), 43); - ASSERT_EQ((*column_to)[8].get(), "World"); + ASSERT_EQ((*column_to)[5].safeGet(), "Hello"); + ASSERT_EQ((*column_to)[7].safeGet(), 43); + ASSERT_EQ((*column_to)[8].safeGet(), "World"); } } @@ -509,8 +509,8 @@ TEST(ColumnVariant, InsertManyFrom) column_to->insertManyFrom(*column_from, 3, 2); ASSERT_EQ(column_to->globalDiscriminatorAt(5), 0); ASSERT_EQ(column_to->globalDiscriminatorAt(6), 0); - ASSERT_EQ((*column_to)[5].get(), 43); - ASSERT_EQ((*column_to)[6].get(), 43); + ASSERT_EQ((*column_to)[5].safeGet(), 43); + ASSERT_EQ((*column_to)[6].safeGet(), 43); } } @@ -520,8 +520,8 @@ TEST(ColumnVariant, PopBackOneColumnNoNulls) column->popBack(3); ASSERT_EQ(column->size(), 2); ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 2); - ASSERT_EQ((*column)[0].get(), 0); - ASSERT_EQ((*column)[1].get(), 1); + ASSERT_EQ((*column)[0].safeGet(), 0); + ASSERT_EQ((*column)[1].safeGet(), 1); } TEST(ColumnVariant, PopBackGeneral) @@ -531,8 +531,8 @@ TEST(ColumnVariant, PopBackGeneral) ASSERT_EQ(column->size(), 3); ASSERT_EQ(column->getVariantByLocalDiscriminator(0).size(), 1); ASSERT_EQ(column->getVariantByLocalDiscriminator(1).size(), 1); - ASSERT_EQ((*column)[0].get(), 42); - ASSERT_EQ((*column)[1].get(), "Hello"); + ASSERT_EQ((*column)[0].safeGet(), 42); + ASSERT_EQ((*column)[1].safeGet(), "Hello"); ASSERT_TRUE((*column)[2].isNull()); } @@ -545,8 +545,8 @@ TEST(ColumnVariant, FilterOneColumnNoNulls) filter.push_back(1); auto filtered_column = column->filter(filter, -1); ASSERT_EQ(filtered_column->size(), 2); - ASSERT_EQ((*filtered_column)[0].get(), 0); - ASSERT_EQ((*filtered_column)[1].get(), 2); + ASSERT_EQ((*filtered_column)[0].safeGet(), 0); + ASSERT_EQ((*filtered_column)[1].safeGet(), 2); } TEST(ColumnVariant, FilterGeneral) @@ -562,7 +562,7 @@ TEST(ColumnVariant, FilterGeneral) filter.push_back(0); auto filtered_column = column->filter(filter, -1); ASSERT_EQ(filtered_column->size(), 3); - ASSERT_EQ((*filtered_column)[0].get(), "Hello"); + ASSERT_EQ((*filtered_column)[0].safeGet(), "Hello"); ASSERT_TRUE((*filtered_column)[1].isNull()); ASSERT_TRUE((*filtered_column)[2].isNull()); } @@ -577,9 +577,9 @@ TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls) permutation.push_back(0); auto permuted_column = column->permute(permutation, 3); ASSERT_EQ(permuted_column->size(), 3); - ASSERT_EQ((*permuted_column)[0].get(), 1); - ASSERT_EQ((*permuted_column)[1].get(), 3); - ASSERT_EQ((*permuted_column)[2].get(), 2); + ASSERT_EQ((*permuted_column)[0].safeGet(), 1); + ASSERT_EQ((*permuted_column)[1].safeGet(), 3); + ASSERT_EQ((*permuted_column)[2].safeGet(), 2); auto index = ColumnUInt64::create(); index->getData().push_back(1); @@ -588,9 +588,9 @@ TEST(ColumnVariant, PermuteAndIndexOneColumnNoNulls) index->getData().push_back(0); auto indexed_column = column->index(*index, 3); ASSERT_EQ(indexed_column->size(), 3); - ASSERT_EQ((*indexed_column)[0].get(), 1); - ASSERT_EQ((*indexed_column)[1].get(), 3); - ASSERT_EQ((*indexed_column)[2].get(), 2); + ASSERT_EQ((*indexed_column)[0].safeGet(), 1); + ASSERT_EQ((*indexed_column)[1].safeGet(), 3); + ASSERT_EQ((*indexed_column)[2].safeGet(), 2); } TEST(ColumnVariant, PermuteGeneral) @@ -603,9 +603,9 @@ TEST(ColumnVariant, PermuteGeneral) permutation.push_back(5); auto permuted_column = column->permute(permutation, 4); ASSERT_EQ(permuted_column->size(), 4); - ASSERT_EQ((*permuted_column)[0].get(), 43); - ASSERT_EQ((*permuted_column)[1].get(), "World"); - ASSERT_EQ((*permuted_column)[2].get(), "Hello"); + ASSERT_EQ((*permuted_column)[0].safeGet(), 43); + ASSERT_EQ((*permuted_column)[1].safeGet(), "World"); + ASSERT_EQ((*permuted_column)[2].safeGet(), "Hello"); ASSERT_TRUE((*permuted_column)[3].isNull()); } @@ -618,12 +618,12 @@ TEST(ColumnVariant, ReplicateOneColumnNoNull) offsets.push_back(6); auto replicated_column = column->replicate(offsets); ASSERT_EQ(replicated_column->size(), 6); - ASSERT_EQ((*replicated_column)[0].get(), 1); - ASSERT_EQ((*replicated_column)[1].get(), 1); - ASSERT_EQ((*replicated_column)[2].get(), 1); - ASSERT_EQ((*replicated_column)[3].get(), 2); - ASSERT_EQ((*replicated_column)[4].get(), 2); - ASSERT_EQ((*replicated_column)[5].get(), 2); + ASSERT_EQ((*replicated_column)[0].safeGet(), 1); + ASSERT_EQ((*replicated_column)[1].safeGet(), 1); + ASSERT_EQ((*replicated_column)[2].safeGet(), 1); + ASSERT_EQ((*replicated_column)[3].safeGet(), 2); + ASSERT_EQ((*replicated_column)[4].safeGet(), 2); + ASSERT_EQ((*replicated_column)[5].safeGet(), 2); } TEST(ColumnVariant, ReplicateGeneral) @@ -637,9 +637,9 @@ TEST(ColumnVariant, ReplicateGeneral) offsets.push_back(7); auto replicated_column = column->replicate(offsets); ASSERT_EQ(replicated_column->size(), 7); - ASSERT_EQ((*replicated_column)[0].get(), 42); - ASSERT_EQ((*replicated_column)[1].get(), "Hello"); - ASSERT_EQ((*replicated_column)[2].get(), "Hello"); + ASSERT_EQ((*replicated_column)[0].safeGet(), 42); + ASSERT_EQ((*replicated_column)[1].safeGet(), "Hello"); + ASSERT_EQ((*replicated_column)[2].safeGet(), "Hello"); ASSERT_TRUE((*replicated_column)[3].isNull()); ASSERT_TRUE((*replicated_column)[4].isNull()); ASSERT_TRUE((*replicated_column)[5].isNull()); @@ -657,13 +657,13 @@ TEST(ColumnVariant, ScatterOneColumnNoNulls) selector.push_back(1); auto columns = column->scatter(3, selector); ASSERT_EQ(columns[0]->size(), 2); - ASSERT_EQ((*columns[0])[0].get(), 0); - ASSERT_EQ((*columns[0])[1].get(), 3); + ASSERT_EQ((*columns[0])[0].safeGet(), 0); + ASSERT_EQ((*columns[0])[1].safeGet(), 3); ASSERT_EQ(columns[1]->size(), 2); - ASSERT_EQ((*columns[1])[0].get(), 1); - ASSERT_EQ((*columns[1])[1].get(), 4); + ASSERT_EQ((*columns[1])[0].safeGet(), 1); + ASSERT_EQ((*columns[1])[1].safeGet(), 4); ASSERT_EQ(columns[2]->size(), 1); - ASSERT_EQ((*columns[2])[0].get(), 2); + ASSERT_EQ((*columns[2])[0].safeGet(), 2); } TEST(ColumnVariant, ScatterGeneral) @@ -680,12 +680,12 @@ TEST(ColumnVariant, ScatterGeneral) auto columns = column->scatter(3, selector); ASSERT_EQ(columns[0]->size(), 3); - ASSERT_EQ((*columns[0])[0].get(), 42); - ASSERT_EQ((*columns[0])[1].get(), "Hello"); - ASSERT_EQ((*columns[0])[2].get(), 43); + ASSERT_EQ((*columns[0])[0].safeGet(), 42); + ASSERT_EQ((*columns[0])[1].safeGet(), "Hello"); + ASSERT_EQ((*columns[0])[2].safeGet(), 43); ASSERT_EQ(columns[1]->size(), 2); - ASSERT_EQ((*columns[1])[0].get(), "World"); - ASSERT_EQ((*columns[1])[1].get(), 44); + ASSERT_EQ((*columns[1])[0].safeGet(), "World"); + ASSERT_EQ((*columns[1])[1].safeGet(), 44); ASSERT_EQ(columns[2]->size(), 2); ASSERT_TRUE((*columns[2])[0].isNull()); ASSERT_TRUE((*columns[2])[1].isNull()); diff --git a/src/Columns/tests/gtest_low_cardinality.cpp b/src/Columns/tests/gtest_low_cardinality.cpp index 5e01279b7df..ce16d2cadb1 100644 --- a/src/Columns/tests/gtest_low_cardinality.cpp +++ b/src/Columns/tests/gtest_low_cardinality.cpp @@ -20,13 +20,13 @@ void testLowCardinalityNumberInsert(const DataTypePtr & data_type) Field value; column->get(0, value); - ASSERT_EQ(value.get(), 15); + ASSERT_EQ(value.safeGet(), 15); column->get(1, value); - ASSERT_EQ(value.get(), 20); + ASSERT_EQ(value.safeGet(), 20); column->get(2, value); - ASSERT_EQ(value.get(), 25); + ASSERT_EQ(value.safeGet(), 25); } TEST(ColumnLowCardinality, Insert) diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h index 9f80ff727b4..bcbcc36c67a 100644 --- a/src/Common/CollectionOfDerived.h +++ b/src/Common/CollectionOfDerived.h @@ -168,7 +168,7 @@ private: records.emplace(it, type_idx, item); } - Records::const_iterator getImpl(std::type_index type_idx) const + typename Records::const_iterator getImpl(std::type_index type_idx) const { auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp index 6c1a8496fe6..23263c988c3 100644 --- a/src/Common/FieldBinaryEncoding.cpp +++ b/src/Common/FieldBinaryEncoding.cpp @@ -208,7 +208,7 @@ void FieldVisitorEncodeBinary::operator() (const Map & x, WriteBuffer & buf) con writeVarUInt(size, buf); for (size_t i = 0; i < size; ++i) { - const Tuple & key_and_value = x[i].get(); + const Tuple & key_and_value = x[i].safeGet(); Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[0]); Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[1]); } diff --git a/src/Common/FieldVisitorSum.cpp b/src/Common/FieldVisitorSum.cpp index b825f188586..af9503ac046 100644 --- a/src/Common/FieldVisitorSum.cpp +++ b/src/Common/FieldVisitorSum.cpp @@ -19,7 +19,7 @@ bool FieldVisitorSum::operator() (UInt64 & x) const return x != 0; } -bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.get(); return x != 0; } +bool FieldVisitorSum::operator() (Float64 & x) const { x += rhs.safeGet(); return x != 0; } bool FieldVisitorSum::operator() (Null &) const { diff --git a/src/Common/FieldVisitorSum.h b/src/Common/FieldVisitorSum.h index cbb4c4a1de3..d28676b5093 100644 --- a/src/Common/FieldVisitorSum.h +++ b/src/Common/FieldVisitorSum.h @@ -37,7 +37,7 @@ public: template bool operator() (DecimalField & x) const { - x += rhs.get>(); + x += rhs.safeGet>(); return x.getValue() != T(0); } diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index c4cb4266418..2148bac20d1 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -172,7 +172,7 @@ String FieldVisitorToString::operator() (const Object & x) const String convertFieldToString(const Field & field) { if (field.getType() == Field::Types::Which::String) - return field.get(); + return field.safeGet(); return applyVisitor(FieldVisitorToString(), field); } diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index a26797a687a..92621db5558 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -297,7 +297,7 @@ public: } /// Only inserts the value if key isn't already present - void ALWAYS_INLINE insertIfNotPresent(const Key & x, const Cell::Mapped & value) + void ALWAYS_INLINE insertIfNotPresent(const Key & x, const typename Cell::Mapped & value) { LookupResult it; bool inserted; diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp index 6793d567aca..3a1304e2d94 100644 --- a/src/Common/examples/arena_with_free_lists.cpp +++ b/src/Common/examples/arena_with_free_lists.cpp @@ -174,19 +174,19 @@ struct Dictionary { switch (attribute.type) { - case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; - case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; + case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.safeGet()); break; + case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.safeGet(); break; case AttributeUnderlyingTypeTest::String: { - const auto & string = value.get(); + const auto & string = value.safeGet(); auto & string_ref = std::get>(attribute.arrays)[idx]; const auto & null_value_ref = std::get(attribute.null_values); diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index af681cd5639..c2bcf6ec651 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -49,7 +49,7 @@ ExternalTableDataPtr BaseExternalTable::getData(ContextPtr context) { initReadBuffer(); initSampleBlock(); - auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").get()); + auto input = context->getInputFormat(format, *read_buffer, sample_block, context->getSettingsRef().get("max_block_size").safeGet()); auto data = std::make_unique(); data->pipe = std::make_unique(); diff --git a/src/Core/Field.h b/src/Core/Field.h index f1bb4a72b0d..689ac38a235 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -457,15 +457,6 @@ public: std::string_view getTypeName() const; bool isNull() const { return which == Types::Null; } - template - NearestFieldType> & get(); - - template - const auto & get() const - { - auto * mutable_this = const_cast *>(this); - return mutable_this->get(); - } bool isNegativeInfinity() const { return which == Types::Null && get().isNegativeInfinity(); } bool isPositiveInfinity() const { return which == Types::Null && get().isPositiveInfinity(); } @@ -681,6 +672,25 @@ private: Types::Which which; + /// This function is prone to type punning and should never be used outside of Field class, + /// whenever it is used within this class the stored type should be checked in advance. + template + NearestFieldType> & get() + { + // Before storing the value in the Field, we static_cast it to the field + // storage type, so here we return the value of storage type as well. + // Otherwise, it is easy to make a mistake of reinterpret_casting the stored + // value to a different and incompatible type. + // For example, a Float32 value is stored as Float64, and it is incorrect to + // return a reference to this value as Float32. + return *reinterpret_cast>*>(&storage); + } + + template + NearestFieldType> & get() const + { + return const_cast(this)->get(); + } /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -859,55 +869,18 @@ constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) || t == Field::Types::UInt64; } -constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) -{ - return t == Field::Types::Int64 - || t == Field::Types::UInt64 - || t == Field::Types::Bool; -} - -// Field value getter with type checking in debug builds. -template -NearestFieldType> & Field::get() -{ - // Before storing the value in the Field, we static_cast it to the field - // storage type, so here we return the value of storage type as well. - // Otherwise, it is easy to make a mistake of reinterpret_casting the stored - // value to a different and incompatible type. - // For example, a Float32 value is stored as Float64, and it is incorrect to - // return a reference to this value as Float32. - using StoredType = NearestFieldType>; - -#ifndef NDEBUG - // Disregard signedness when converting between int64 types. - constexpr Field::Types::Which target = TypeToEnum::value; - if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which)) && target != Field::Types::IPv4) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Invalid Field get from type {} to type {}", which, target); -#endif - - StoredType * MAY_ALIAS ptr = reinterpret_cast(&storage); - - return *ptr; -} - - template auto & Field::safeGet() { const Types::Which target = TypeToEnum>>::value; - /// We allow converting int64 <-> uint64, int64 <-> bool, uint64 <-> bool in safeGet(). - if (target != which - && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) - throw Exception(ErrorCodes::BAD_GET, - "Bad get: has {}, requested {}", getTypeName(), target); + /// bool is stored as uint64, will be returned as UInt64 when requested as bool or UInt64, as Int64 when requested as Int64 + if (target != which && !(which == Field::Types::Bool && (target == Field::Types::UInt64 || target == Field::Types::Int64))) + throw Exception(ErrorCodes::BAD_GET, "Bad get: has {}, requested {}", getTypeName(), target); return get(); } - template requires not_field_or_bool_or_stringlike Field::Field(T && rhs) diff --git a/src/Core/Range.cpp b/src/Core/Range.cpp index 956b96653a1..1a5ce1e012e 100644 --- a/src/Core/Range.cpp +++ b/src/Core/Range.cpp @@ -62,27 +62,27 @@ void Range::shrinkToIncludedIfPossible() { if (left.isExplicit() && !left_included) { - if (left.getType() == Field::Types::UInt64 && left.get() != std::numeric_limits::max()) + if (left.getType() == Field::Types::UInt64 && left.safeGet() != std::numeric_limits::max()) { - ++left.get(); + ++left.safeGet(); left_included = true; } - if (left.getType() == Field::Types::Int64 && left.get() != std::numeric_limits::max()) + if (left.getType() == Field::Types::Int64 && left.safeGet() != std::numeric_limits::max()) { - ++left.get(); + ++left.safeGet(); left_included = true; } } if (right.isExplicit() && !right_included) { - if (right.getType() == Field::Types::UInt64 && right.get() != std::numeric_limits::min()) + if (right.getType() == Field::Types::UInt64 && right.safeGet() != std::numeric_limits::min()) { - --right.get(); + --right.safeGet(); right_included = true; } - if (right.getType() == Field::Types::Int64 && right.get() != std::numeric_limits::min()) + if (right.getType() == Field::Types::Int64 && right.safeGet() != std::numeric_limits::min()) { - --right.get(); + --right.safeGet(); right_included = true; } } diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 9c9c9c1db00..45bd2b9eb42 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -118,7 +118,7 @@ void Settings::set(std::string_view name, const Field & value) { if (value.getType() != Field::Types::Which::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName()); - applyCompatibilitySetting(value.get()); + applyCompatibilitySetting(value.safeGet()); } /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 7d094e2a107..bb2ef58b214 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -53,29 +53,29 @@ namespace { if (f.getType() == Field::Types::String) { - return stringToNumber(f.get()); + return stringToNumber(f.safeGet()); } else if (f.getType() == Field::Types::UInt64) { T result; - if (!accurate::convertNumeric(f.get(), result)) + if (!accurate::convertNumeric(f.safeGet(), result)) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name())); return result; } else if (f.getType() == Field::Types::Int64) { T result; - if (!accurate::convertNumeric(f.get(), result)) + if (!accurate::convertNumeric(f.safeGet(), result)) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Field value {} is out of range of {} type", f, demangle(typeid(T).name())); return result; } else if (f.getType() == Field::Types::Bool) { - return T(f.get()); + return T(f.safeGet()); } else if (f.getType() == Field::Types::Float64) { - Float64 x = f.get(); + Float64 x = f.safeGet(); if constexpr (std::is_floating_point_v) { return T(x); @@ -120,7 +120,7 @@ namespace if (f.getType() == Field::Types::String) { /// Allow to parse Map from string field. For the convenience. - const auto & str = f.get(); + const auto & str = f.safeGet(); return stringToMap(str); } @@ -218,7 +218,7 @@ namespace UInt64 fieldToMaxThreads(const Field & f) { if (f.getType() == Field::Types::String) - return stringToMaxThreads(f.get()); + return stringToMaxThreads(f.safeGet()); else return fieldToNumber(f); } diff --git a/src/Core/SettingsQuirks.cpp b/src/Core/SettingsQuirks.cpp index 5541cc19653..3127a5ef36d 100644 --- a/src/Core/SettingsQuirks.cpp +++ b/src/Core/SettingsQuirks.cpp @@ -100,7 +100,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) return current_value; }; - UInt64 max_threads = get_current_value("max_threads").get(); + UInt64 max_threads = get_current_value("max_threads").safeGet(); UInt64 max_threads_max_value = 256 * getNumberOfPhysicalCPUCores(); if (max_threads > max_threads_max_value) { @@ -120,7 +120,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) "input_format_parquet_max_block_size"}; for (auto const & setting : block_rows_settings) { - if (auto block_size = get_current_value(setting).get(); + if (auto block_size = get_current_value(setting).safeGet(); block_size > max_sane_block_rows_size) { if (log) @@ -129,7 +129,7 @@ void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log) } } - if (auto max_block_size = get_current_value("max_block_size").get(); max_block_size == 0) + if (auto max_block_size = get_current_value("max_block_size").safeGet(); max_block_size == 0) { if (log) LOG_WARNING(log, "Sanity check: 'max_block_size' cannot be 0. Set to default value {}", DEFAULT_BLOCK_SIZE); diff --git a/src/Core/examples/field.cpp b/src/Core/examples/field.cpp index 110e11d0cb1..3064290e127 100644 --- a/src/Core/examples/field.cpp +++ b/src/Core/examples/field.cpp @@ -37,7 +37,7 @@ int main(int argc, char ** argv) std::cerr << applyVisitor(to_string, field) << std::endl; } - field.get().push_back(field); + field.safeGet().push_back(field); std::cerr << applyVisitor(to_string, field) << std::endl; std::cerr << (field < field2) << std::endl; diff --git a/src/Core/tests/gtest_field.cpp b/src/Core/tests/gtest_field.cpp index 5585442d835..7e778be9575 100644 --- a/src/Core/tests/gtest_field.cpp +++ b/src/Core/tests/gtest_field.cpp @@ -8,31 +8,31 @@ GTEST_TEST(Field, FromBool) { Field f{false}; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 0); - ASSERT_EQ(f.get(), false); + ASSERT_EQ(f.safeGet(), 0); + ASSERT_EQ(f.safeGet(), false); } { Field f{true}; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 1); - ASSERT_EQ(f.get(), true); + ASSERT_EQ(f.safeGet(), 1); + ASSERT_EQ(f.safeGet(), true); } { Field f; f = false; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 0); - ASSERT_EQ(f.get(), false); + ASSERT_EQ(f.safeGet(), 0); + ASSERT_EQ(f.safeGet(), false); } { Field f; f = true; ASSERT_EQ(f.getType(), Field::Types::Bool); - ASSERT_EQ(f.get(), 1); - ASSERT_EQ(f.get(), true); + ASSERT_EQ(f.safeGet(), 1); + ASSERT_EQ(f.safeGet(), true); } } @@ -42,15 +42,15 @@ GTEST_TEST(Field, Move) Field f; f = Field{String{"Hello, world (1)"}}; - ASSERT_EQ(f.get(), "Hello, world (1)"); + ASSERT_EQ(f.safeGet(), "Hello, world (1)"); f = Field{String{"Hello, world (2)"}}; - ASSERT_EQ(f.get(), "Hello, world (2)"); + ASSERT_EQ(f.safeGet(), "Hello, world (2)"); f = Field{Array{Field{String{"Hello, world (3)"}}}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (3)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (3)"); f = String{"Hello, world (4)"}; - ASSERT_EQ(f.get(), "Hello, world (4)"); + ASSERT_EQ(f.safeGet(), "Hello, world (4)"); f = Array{Field{String{"Hello, world (5)"}}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (5)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (5)"); f = Array{String{"Hello, world (6)"}}; - ASSERT_EQ(f.get()[0].get(), "Hello, world (6)"); + ASSERT_EQ(f.safeGet()[0].safeGet(), "Hello, world (6)"); } diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 09175617bf1..dbb713bc382 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -119,7 +119,7 @@ MutableColumnPtr DataTypeAggregateFunction::createColumn() const Field DataTypeAggregateFunction::getDefault() const { Field field = AggregateFunctionStateData(); - field.get().name = getName(); + field.safeGet().name = getName(); AlignedBuffer place_buffer(function->sizeOfData(), function->alignOfData()); AggregateDataPtr place = place_buffer.data(); @@ -128,7 +128,7 @@ Field DataTypeAggregateFunction::getDefault() const try { - WriteBufferFromString buffer_from_field(field.get().data); + WriteBufferFromString buffer_from_field(field.safeGet().data); function->serialize(place, buffer_from_field, version); } catch (...) diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index a1b1f8325f0..e0430272479 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -71,10 +71,10 @@ static DataTypePtr create(const ASTPtr & arguments) auto * literal = argument->arguments->children[1]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > ColumnVariant::MAX_NESTED_COLUMNS) + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.safeGet() == 0 || literal->value.safeGet() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); - return std::make_shared(literal->value.get()); + return std::make_shared(literal->value.safeGet()); } void registerDataTypeDynamic(DataTypeFactory & factory) diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index 08e0c0d2045..b9a5a1a5a68 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -122,12 +122,12 @@ Field DataTypeEnum::castToName(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - this->getValue(value_or_name.get()); /// Check correctness - return value_or_name.get(); + this->getValue(value_or_name.safeGet()); /// Check correctness + return value_or_name.safeGet(); } else if (value_or_name.getType() == Field::Types::Int64) { - Int64 value = value_or_name.get(); + Int64 value = value_or_name.safeGet(); checkOverflow(value); return this->getNameForValue(static_cast(value)).toString(); } @@ -141,12 +141,12 @@ Field DataTypeEnum::castToValue(const Field & value_or_name) const { if (value_or_name.getType() == Field::Types::String) { - return this->getValue(value_or_name.get()); + return this->getValue(value_or_name.safeGet()); } else if (value_or_name.getType() == Field::Types::Int64 || value_or_name.getType() == Field::Types::UInt64) { - Int64 value = value_or_name.get(); + Int64 value = value_or_name.safeGet(); checkOverflow(value); this->getNameForValue(static_cast(value)); /// Check correctness return value; @@ -220,7 +220,7 @@ static void autoAssignNumberForEnum(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - literal_child_assign_num = value_literal->value.get(); + literal_child_assign_num = value_literal->value.safeGet(); } assign_number_child.emplace_back(child); } @@ -269,8 +269,8 @@ static DataTypePtr createExact(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - const String & field_name = name_literal->value.get(); - const auto value = value_literal->value.get(); + const String & field_name = name_literal->value.safeGet(); + const auto value = value_literal->value.safeGet(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value {} for element '{}' exceeds range of {}", @@ -302,7 +302,7 @@ static DataTypePtr create(const ASTPtr & arguments) "Elements of Enum data type must be of form: " "'name' = number or 'name', where name is string literal and number is an integer"); - Int64 value = value_literal->value.get(); + Int64 value = value_literal->value.safeGet(); if (value > std::numeric_limits::max() || value < std::numeric_limits::min()) return createExact(arguments); diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index 080ff8826a5..63d5245287f 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -51,11 +51,11 @@ static DataTypePtr create(const ASTPtr & arguments) "FixedString data type family must have exactly one argument - size in bytes"); const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.safeGet() == 0) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "FixedString data type family must have a number (positive integer) as its argument"); - return std::make_shared(argument->value.get()); + return std::make_shared(argument->value.safeGet()); } diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 91b9bfcb2a5..5fd69688194 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -69,7 +69,7 @@ static DataTypePtr create(const ASTPtr & arguments) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Object data type family must have a const string as its schema name parameter"); - return std::make_shared(literal->value.get(), is_nullable); + return std::make_shared(literal->value.safeGet(), is_nullable); } void registerDataTypeObject(DataTypeFactory & factory) diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index a427fd0717a..1d8f7711de1 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -80,14 +80,14 @@ static DataTypePtr create(const ASTPtr & arguments) const auto * precision_arg = arguments->children[0]->as(); if (!precision_arg || precision_arg->value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument precision is invalid"); - precision = precision_arg->value.get(); + precision = precision_arg->value.safeGet(); if (arguments->children.size() == 2) { const auto * scale_arg = arguments->children[1]->as(); if (!scale_arg || !isInt64OrUInt64FieldType(scale_arg->value.getType())) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal argument scale is invalid"); - scale = scale_arg->value.get(); + scale = scale_arg->value.safeGet(); } } @@ -107,7 +107,7 @@ static DataTypePtr createExact(const ASTPtr & arguments) "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have a one number as its argument"); UInt64 precision = DecimalUtils::max_precision; - UInt64 scale = scale_arg->value.get(); + UInt64 scale = scale_arg->value.safeGet(); return createDecimal(precision, scale); } diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 1d525e5987f..a7f2ff83873 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -104,7 +104,7 @@ Array createEmptyArrayField(size_t num_dimensions) for (size_t i = 1; i < num_dimensions; ++i) { current_array->push_back(Array()); - current_array = ¤t_array->back().get(); + current_array = ¤t_array->back().safeGet(); } return array; diff --git a/src/DataTypes/Serializations/JSONDataParser.cpp b/src/DataTypes/Serializations/JSONDataParser.cpp index 56641424396..0f74815f5b4 100644 --- a/src/DataTypes/Serializations/JSONDataParser.cpp +++ b/src/DataTypes/Serializations/JSONDataParser.cpp @@ -131,7 +131,7 @@ void JSONDataParser::traverseArrayElement(const Element & element, P auto nested_hash = getHashOfNestedPath(paths[i], values[i]); if (nested_hash) { - size_t array_size = values[i].template get().size(); + size_t array_size = values[i].template safeGet().size(); auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash]; if (current_nested_sizes.size() == ctx.current_size) @@ -154,7 +154,7 @@ void JSONDataParser::traverseArrayElement(const Element & element, P auto nested_hash = getHashOfNestedPath(paths[i], values[i]); if (nested_hash) { - size_t array_size = values[i].template get().size(); + size_t array_size = values[i].template safeGet().size(); auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash]; if (current_nested_sizes.empty()) diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 55f7641e058..41b198890e4 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -16,14 +16,14 @@ namespace DB void SerializationAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - const AggregateFunctionStateData & state = field.get(); + const AggregateFunctionStateData & state = field.safeGet(); writeBinary(state.data, ostr); } void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = AggregateFunctionStateData(); - AggregateFunctionStateData & s = field.get(); + AggregateFunctionStateData & s = field.safeGet(); readBinary(s.data, istr); s.name = type_name; } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index b7d43332085..0a9c4529e23 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -29,7 +29,7 @@ static constexpr size_t MAX_ARRAYS_SIZE = 1ULL << 40; void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const Array & a = field.get(); + const Array & a = field.safeGet(); writeVarUInt(a.size(), ostr); for (const auto & i : a) { @@ -51,7 +51,7 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, con settings.binary.max_binary_string_size); field = Array(); - Array & arr = field.get(); + Array & arr = field.safeGet(); arr.reserve(size); for (size_t i = 0; i < size; ++i) nested->deserializeBinary(arr.emplace_back(), istr, settings); diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/src/DataTypes/Serializations/SerializationDecimalBase.cpp index 49dc042e872..8927f949368 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.cpp +++ b/src/DataTypes/Serializations/SerializationDecimalBase.cpp @@ -13,7 +13,7 @@ namespace DB template void SerializationDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - FieldType x = field.get>(); + FieldType x = field.safeGet>(); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index f919dc16d33..688c71792fa 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -28,7 +28,7 @@ static constexpr size_t MAX_STRINGS_SIZE = 1ULL << 30; void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - const String & s = field.get(); + const String & s = field.safeGet(); ostr.write(s.data(), std::min(s.size(), n)); if (s.size() < n) for (size_t i = s.size(); i < n; ++i) @@ -39,7 +39,7 @@ void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = String(); - String & s = field.get(); + String & s = field.safeGet(); s.resize(n); istr.readStrict(s.data(), n); } diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp index dfcd24aff58..c1beceb4533 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp @@ -125,7 +125,7 @@ bool SerializationIP::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadB template void SerializationIP::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - IPv x = field.get(); + IPv x = field.safeGet(); if constexpr (std::is_same_v) writeBinary(x, ostr); else diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 0bef3c7d79d..c722b3ac7a1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -40,7 +40,7 @@ static IColumn & extractNestedColumn(IColumn & column) void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & map = field.get(); + const auto & map = field.safeGet(); writeVarUInt(map.size(), ostr); for (const auto & elem : map) { @@ -63,7 +63,7 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const size, settings.binary.max_binary_string_size); field = Map(); - Map & map = field.get(); + Map & map = field.safeGet(); map.reserve(size); for (size_t i = 0; i < size; ++i) { diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index bdb4dfc6735..bfc13af8ca3 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -169,7 +169,7 @@ template void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { /// ColumnVector::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64 - typename ColumnVector::ValueType x = static_cast::ValueType>(field.get()); + typename ColumnVector::ValueType x = static_cast::ValueType>(field.safeGet()); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 9e523d0d745..ac5d4e3e128 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const String & s = field.get(); + const String & s = field.safeGet(); if (settings.binary.max_binary_string_size && s.size() > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, @@ -59,7 +59,7 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co settings.binary.max_binary_string_size); field = String(); - String & s = field.get(); + String & s = field.safeGet(); s.resize(size); istr.readStrict(s.data(), size); } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 7a5227ca752..594a23ab507 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -34,7 +34,7 @@ static inline const IColumn & extractElementColumn(const IColumn & column, size_ void SerializationTuple::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - const auto & tuple = field.get(); + const auto & tuple = field.safeGet(); for (size_t element_index = 0; element_index < elems.size(); ++element_index) { const auto & serialization = elems[element_index]; @@ -47,7 +47,7 @@ void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr, con const size_t size = elems.size(); field = Tuple(); - Tuple & tuple = field.get(); + Tuple & tuple = field.safeGet(); tuple.reserve(size); for (size_t i = 0; i < size; ++i) elems[i]->deserializeBinary(tuple.emplace_back(), istr, settings); diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 58178a896dc..f18466ad8ad 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -137,7 +137,7 @@ bool SerializationUUID::tryDeserializeTextCSV(IColumn & column, ReadBuffer & ist void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { - UUID x = field.get(); + UUID x = field.safeGet(); writeBinaryLittleEndian(x, ostr); } diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 84a52d4affb..9a632bd381b 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -55,7 +55,7 @@ getArgument(const ASTPtr & arguments, size_t argument_index, const char * argume } } - return argument->value.get(); + return argument->value.safeGet(); } static DataTypePtr create(const ASTPtr & arguments) diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 67bce915168..b91aa84ecd3 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -183,7 +183,7 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction if (name->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(name->value.safeGet()); if (!maybe_qualified_name) return; @@ -194,7 +194,7 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction if (literal->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.safeGet()); /// Just return if name if invalid if (!maybe_qualified_name) return; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 38e100e2470..7556223b30e 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -180,7 +180,7 @@ namespace if (database_name_field && table_name_field) { - QualifiedTableName qualified_name{database_name_field->get(), table_name_field->get()}; + QualifiedTableName qualified_name{database_name_field->safeGet(), table_name_field->safeGet()}; if (!qualified_name.database.empty() && !qualified_name.table.empty()) { auto new_qualified_name = data.renaming_map.getNewTableName(qualified_name); @@ -207,7 +207,7 @@ namespace if (literal->value.getType() != Field::Types::String) return; - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.safeGet()); /// Just return if name if invalid if (!maybe_qualified_name || maybe_qualified_name->database.empty() || maybe_qualified_name->table.empty()) return; @@ -247,7 +247,7 @@ namespace if (!literal || (literal->value.getType() != Field::Types::String)) return; - auto database_name = literal->value.get(); + auto database_name = literal->value.safeGet(); if (database_name.empty()) return; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f127ccbc224..602bb29aebc 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -802,8 +802,8 @@ void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStora if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) return; - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); + String maybe_path = arg1->value.safeGet(); + String maybe_replica = arg2->value.safeGet(); /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. /// Let's ensure that some macros are used. diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 04b4070d5af..1364e9ae2b2 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -736,11 +736,11 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) - casted_column->insertValue(static_cast(value.template get())); + casted_column->insertValue(static_cast(value.template safeGet())); } }; @@ -776,17 +776,17 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) { if (value.getType() == Field::Types::UInt64) - casted_int32_column->insertValue(static_cast(value.get())); + casted_int32_column->insertValue(static_cast(value.safeGet())); else if (value.getType() == Field::Types::Int64) { /// For MYSQL_TYPE_INT24 - const Int32 & num = static_cast(value.get()); + const Int32 & num = static_cast(value.safeGet()); casted_int32_column->insertValue(num & 0x800000 ? num | 0xFF000000 : num); } else @@ -798,7 +798,7 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) @@ -812,12 +812,12 @@ static void writeFieldsToColumn( { for (size_t index = 0; index < rows_data.size(); ++index) { - const Tuple & row_data = rows_data[index].get(); + const Tuple & row_data = rows_data[index].safeGet(); const Field & value = row_data[column_index]; if (write_data_to_null_map(value, index)) { - const String & data = value.get(); + const String & data = value.safeGet(); casted_fixed_string_column->insertData(data.data(), data.size()); } } @@ -864,7 +864,7 @@ static inline size_t onUpdateData(const Row & rows_data, Block & buffer, size_t { writeable_rows_mask[index + 1] = true; writeable_rows_mask[index] = differenceSortingKeys( - rows_data[index].get(), rows_data[index + 1].get(), sorting_columns_index); + rows_data[index].safeGet(), rows_data[index + 1].safeGet(), sorting_columns_index); } for (size_t column = 0; column < buffer.columns() - 2; ++column) diff --git a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp index 11299c5b8b1..6f1ba26ee33 100644 --- a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp +++ b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp @@ -281,12 +281,12 @@ static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) ASSERT_EQ(write_event->table, "a"); ASSERT_EQ(write_event->rows.size(), 1); ASSERT_EQ(write_event->rows[0].getType(), Field::Types::Tuple); - auto row_data = write_event->rows[0].get(); + auto row_data = write_event->rows[0].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 1u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 1u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); ++count; @@ -342,18 +342,18 @@ static void testFile1(IBinlog & binlog, UInt64 timeout, bool filtered = false) ASSERT_EQ(update_event->table, "a"); ASSERT_EQ(update_event->rows.size(), 2); ASSERT_EQ(update_event->rows[0].getType(), Field::Types::Tuple); - row_data = update_event->rows[0].get(); + row_data = update_event->rows[0].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 1u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); - row_data = update_event->rows[1].get(); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 1u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); + row_data = update_event->rows[1].safeGet(); ASSERT_EQ(row_data.size(), 4u); - ASSERT_EQ(row_data[0].get(), 1u); - ASSERT_EQ(row_data[1].get(), 2u); - ASSERT_EQ(row_data[2].get(), 1u); - ASSERT_EQ(row_data[3].get(), 1u); + ASSERT_EQ(row_data[0].safeGet(), 1u); + ASSERT_EQ(row_data[1].safeGet(), 2u); + ASSERT_EQ(row_data[2].safeGet(), 1u); + ASSERT_EQ(row_data[3].safeGet(), 1u); ASSERT_TRUE(binlog.tryReadEvent(event, timeout)); ++count; diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 47f99bd1093..781822533e9 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -395,13 +395,13 @@ private: } else if constexpr (std::is_same_v) { - const String & string_value = column_value.get(); + const String & string_value = column_value.safeGet(); StringRef inserted_value = copyStringInArena(arena, string_value); container.back() = inserted_value; } else { - container.back() = static_cast(column_value.get()); + container.back() = static_cast(column_value.safeGet()); } }); } @@ -441,7 +441,7 @@ private: } else if constexpr (std::is_same_v) { - const String & string_value = column_value.get(); + const String & string_value = column_value.safeGet(); StringRef inserted_value = copyStringInArena(arena, string_value); if (!cell_was_default) @@ -454,7 +454,7 @@ private: } else { - container[index_to_use] = static_cast(column_value.get()); + container[index_to_use] = static_cast(column_value.safeGet()); } }); } @@ -651,12 +651,12 @@ private: } else if constexpr (std::is_same_v) { - auto & value = default_value.get(); + auto & value = default_value.safeGet(); value_setter(value); } else { - value_setter(default_value.get()); + value_setter(default_value.safeGet()); } } else diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 64fc05e99ab..43fd39640c3 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -345,7 +345,7 @@ public: if (attribute_default_value.isNull()) default_value_is_null = true; else - default_value = static_cast(attribute_default_value.get()); + default_value = static_cast(attribute_default_value.safeGet()); } else { @@ -377,7 +377,7 @@ public: if constexpr (std::is_same_v) { Field field = (*default_values_column)[row]; - return field.get(); + return field.safeGet(); } else if constexpr (std::is_same_v) return default_values_column->getDataAt(row); diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 999160226d9..b0233766741 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -245,7 +245,7 @@ ColumnPtr FlatDictionary::getHierarchy(ColumnPtr key_column, const DataTypePtr & std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -300,7 +300,7 @@ ColumnUInt8::Ptr FlatDictionary::isInHierarchy( std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const ContainerType & parent_keys = std::get>(hierarchical_attribute.container); @@ -701,7 +701,7 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const UInt64 key, return; } - auto & attribute_value = value.get(); + auto & attribute_value = value.safeGet(); auto & container = std::get>(attribute.container); loaded_keys[key] = true; diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d7d50dfb0a6..8768be8e5ec 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -240,7 +240,7 @@ ColumnPtr HashedArrayDictionary::getHierarchy(Colu std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); auto is_key_valid_func = [&, this](auto & key) @@ -313,7 +313,7 @@ ColumnUInt8::Ptr HashedArrayDictionary::isInHierar std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); auto is_key_valid_func = [&](auto & key) @@ -581,13 +581,13 @@ void HashedArrayDictionary::blockToAttributes(cons if constexpr (std::is_same_v) { - String & value_to_insert = column_value_to_insert.get(); + String & value_to_insert = column_value_to_insert.safeGet(); StringRef string_in_arena_reference = copyStringInArena(*string_arenas[shard], value_to_insert); attribute_container.back() = string_in_arena_reference; } else { - auto value_to_insert = static_cast(column_value_to_insert.get()); + auto value_to_insert = static_cast(column_value_to_insert.safeGet()); attribute_container.back() = value_to_insert; } }; diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 3a2b61e5149..7e935fe4855 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -636,7 +636,7 @@ ColumnPtr HashedDictionary::getHierarchy(C std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const CollectionsHolder & child_key_to_parent_key_maps = std::get>(hierarchical_attribute.containers); @@ -710,7 +710,7 @@ ColumnUInt8::Ptr HashedDictionary::isInHie std::optional null_value; if (!dictionary_attribute.null_value.isNull()) - null_value = dictionary_attribute.null_value.get(); + null_value = dictionary_attribute.null_value.safeGet(); const CollectionsHolder & child_key_to_parent_key_maps = std::get>(hierarchical_attribute.containers); @@ -1004,13 +1004,13 @@ void HashedDictionary::blockToAttributes(c if constexpr (std::is_same_v) { - String & value_to_insert = column_value_to_insert.get(); + String & value_to_insert = column_value_to_insert.safeGet(); StringRef arena_value = copyStringInArena(*string_arenas[shard], value_to_insert); container.insert({key, arena_value}); } else { - auto value_to_insert = static_cast(column_value_to_insert.get()); + auto value_to_insert = static_cast(column_value_to_insert.safeGet()); container.insert({key, value_to_insert}); } diff --git a/src/Dictionaries/HierarchyDictionariesUtils.cpp b/src/Dictionaries/HierarchyDictionariesUtils.cpp index e1119982a34..de532ade26d 100644 --- a/src/Dictionaries/HierarchyDictionariesUtils.cpp +++ b/src/Dictionaries/HierarchyDictionariesUtils.cpp @@ -50,7 +50,7 @@ namespace std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); ColumnPtr key_to_request_column = ColumnVector::create(); auto * key_to_request_column_typed = static_cast *>(key_to_request_column->assumeMutable().get()); @@ -190,7 +190,7 @@ ColumnPtr getKeysHierarchyDefaultImplementation( std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); auto get_parent_key_func = [&](auto & key) { @@ -252,7 +252,7 @@ ColumnUInt8::Ptr getKeysIsInHierarchyDefaultImplementation( std::optional null_value; if (!hierarchical_attribute.null_value.isNull()) - null_value = hierarchical_attribute.null_value.get(); + null_value = hierarchical_attribute.null_value.safeGet(); auto get_parent_key_func = [&](auto & key) { diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index 41fafcc162b..4f9e991752f 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -613,14 +613,14 @@ void IPAddressDictionary::calculateBytesAllocated() template void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = null_value.isNull() ? T{} : T(null_value.get()); + attribute.null_values = null_value.isNull() ? T{} : T(null_value.safeGet()); attribute.maps.emplace>(); } template <> void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) { - attribute.null_values = null_value.isNull() ? String() : null_value.get(); + attribute.null_values = null_value.isNull() ? String() : null_value.safeGet(); attribute.maps.emplace>(); attribute.string_arena = std::make_unique(); } @@ -976,13 +976,13 @@ void IPAddressDictionary::setAttributeValue(Attribute & attribute, const Field & if constexpr (std::is_same_v) { - const auto & string = value.get(); + const auto & string = value.safeGet(); const auto * string_in_arena = attribute.string_arena->insert(string.data(), string.size()); setAttributeValueImpl(attribute, StringRef{string_in_arena, string.size()}); } else { - setAttributeValueImpl(attribute, static_cast(value.get())); + setAttributeValueImpl(attribute, static_cast(value.safeGet())); } }; diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 46910fa9f6a..c30a6f90e44 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,7 +1,6 @@ #include "MongoDBDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "registerDictionaries.h" #include #include @@ -233,7 +232,7 @@ QueryPipeline MongoDBDictionarySource::loadKeys(const Columns & key_columns, con } case AttributeUnderlyingType::String: { - String loaded_str((*key_columns[attribute_index])[row_idx].get()); + String loaded_str((*key_columns[attribute_index])[row_idx].safeGet()); /// Convert string to ObjectID if (key_attribute.is_object_id) { diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index dfc920623e3..ff29ca1f6b8 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -141,7 +141,7 @@ ColumnPtr IPolygonDictionary::getColumn( { getItemsShortCircuitImpl( requested_key_points, - [&](size_t row) { return (*attribute_values_column)[row].get(); }, + [&](size_t row) { return (*attribute_values_column)[row].safeGet(); }, [&](Array & value) { result_column_typed.insert(value); }, default_mask.value()); } @@ -149,7 +149,7 @@ ColumnPtr IPolygonDictionary::getColumn( { getItemsImpl( requested_key_points, - [&](size_t row) { return (*attribute_values_column)[row].get(); }, + [&](size_t row) { return (*attribute_values_column)[row].safeGet(); }, [&](Array & value) { result_column_typed.insert(value); }, default_value_provider.value()); } @@ -432,16 +432,16 @@ void IPolygonDictionary::getItemsImpl( } else if constexpr (std::is_same_v) { - set_value(default_value.get()); + set_value(default_value.safeGet()); } else if constexpr (std::is_same_v) { - auto default_value_string = default_value.get(); + auto default_value_string = default_value.safeGet(); set_value(default_value_string); } else { - set_value(default_value.get>()); + set_value(default_value.safeGet>()); } } } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index fc6c98990d0..c264b480bcb 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -906,13 +906,13 @@ void RangeHashedDictionary::setAttributeValue(Attribute & a if constexpr (std::is_same_v) { - const auto & string = value.get(); + const auto & string = value.safeGet(); StringRef string_ref = copyStringInArena(string_arena, string); value_to_insert = string_ref; } else { - value_to_insert = static_cast(value.get()); + value_to_insert = static_cast(value.safeGet()); } container.back() = value_to_insert; diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index 1736cdff306..9db639a0ca4 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -1,7 +1,6 @@ #include "RedisDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "registerDictionaries.h" #include #include @@ -160,7 +159,7 @@ namespace DB if (isInteger(type)) key << DB::toString(key_columns[i]->get64(row)); else if (isString(type)) - key << (*key_columns[i])[row].get(); + key << (*key_columns[i])[row].safeGet(); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of key in Redis dictionary"); } diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index fd43f31a009..67bae372f87 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -113,7 +113,7 @@ std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr FlattenDiskConfigurationVisitor::Data data{context, attach}; FlattenDiskConfigurationVisitor{data}.visit(ast); - auto disk_name = assert_cast(*ast).value.get(); + auto disk_name = assert_cast(*ast).value.safeGet(); LOG_TRACE(getLogger("getOrCreateDiskFromDiskAST"), "Result disk name: {}", disk_name); return disk_name; } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a7bd398cdaa..5dbafe48c3a 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1196,7 +1196,7 @@ struct ToYearImpl { if (point.getType() != Field::Types::UInt64) return std::nullopt; - auto year = point.get(); + auto year = point.safeGet(); if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; const DateLUTImpl & date_lut = DateLUT::instance("UTC"); @@ -2001,7 +2001,7 @@ struct ToYYYYMMImpl { if (point.getType() != Field::Types::UInt64) return std::nullopt; - auto year_month = point.get(); + auto year_month = point.safeGet(); auto year = year_month / 100; auto month = year_month % 100; diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h index 306b6395dc5..210bb69e16d 100644 --- a/src/Functions/FunctionsConsistentHashing.h +++ b/src/Functions/FunctionsConsistentHashing.h @@ -101,9 +101,9 @@ private: BucketsType num_buckets; if (buckets_field.getType() == Field::Types::Int64) - num_buckets = checkBucketsRange(buckets_field.get()); + num_buckets = checkBucketsRange(buckets_field.safeGet()); else if (buckets_field.getType() == Field::Types::UInt64) - num_buckets = checkBucketsRange(buckets_field.get()); + num_buckets = checkBucketsRange(buckets_field.safeGet()); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of the second argument of function {}", diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 675283d011e..0760b929652 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -117,7 +117,7 @@ UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) Field field; named_column.column->get(0, field); - return static_cast(field.get()); + return static_cast(field.safeGet()); } @@ -2604,8 +2604,8 @@ struct ToNumberMonotonicity if (left.isNull() || right.isNull()) return {}; - Float64 left_float = left.get(); - Float64 right_float = right.get(); + Float64 left_float = left.safeGet(); + Float64 right_float = right.safeGet(); if (left_float >= static_cast(std::numeric_limits::min()) && left_float <= static_cast(std::numeric_limits::max()) @@ -2633,11 +2633,11 @@ struct ToNumberMonotonicity const bool left_in_first_half = left.isNull() ? from_is_unsigned - : (left.get() >= 0); + : (left.safeGet() >= 0); const bool right_in_first_half = right.isNull() ? !from_is_unsigned - : (right.get() >= 0); + : (right.safeGet() >= 0); /// Size of type is the same. if (size_of_from == size_of_to) @@ -2675,7 +2675,7 @@ struct ToNumberMonotonicity return {}; /// Function cannot be monotonic when left and right are not on the same ranges. - if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) + if (divideByRangeOfType(left.safeGet()) != divideByRangeOfType(right.safeGet())) return {}; if (to_is_unsigned) @@ -2683,7 +2683,7 @@ struct ToNumberMonotonicity else { // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. - const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); + const bool is_monotonic = (T(left.safeGet()) >= 0) == (T(right.safeGet()) >= 0); return { .is_monotonic = is_monotonic }; } @@ -2707,13 +2707,13 @@ struct ToDateMonotonicity } else if ( ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF))) || (( (left.getType() == Field::Types::Float64 || left.isNull()) && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + && ((left.isNull() || left.safeGet() < 0xFFFF) && (right.isNull() || right.safeGet() >= 0xFFFF)))) || !isNativeNumber(type)) { return {}; @@ -2768,16 +2768,16 @@ struct ToStringMonotonicity if (left.getType() == Field::Types::UInt64 && right.getType() == Field::Types::UInt64) { - return (left.get() == 0 && right.get() == 0) - || (floor(log10(left.get())) == floor(log10(right.get()))) + return (left.safeGet() == 0 && right.safeGet() == 0) + || (floor(log10(left.safeGet())) == floor(log10(right.safeGet()))) ? positive : not_monotonic; } if (left.getType() == Field::Types::Int64 && right.getType() == Field::Types::Int64) { - return (left.get() == 0 && right.get() == 0) - || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) + return (left.safeGet() == 0 && right.safeGet() == 0) + || (left.safeGet() > 0 && right.safeGet() > 0 && floor(log10(left.safeGet())) == floor(log10(right.safeGet()))) ? positive : not_monotonic; } @@ -4673,7 +4673,7 @@ private: return [function_name] ( ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) { - using ColumnEnumType = EnumType::ColumnType; + using ColumnEnumType = typename EnumType::ColumnType; const auto & first_col = arguments.front().column.get(); const auto & first_type = arguments.front().type.get(); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index c35df8ba72d..1eaf0d1609a 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -243,7 +243,7 @@ private: } case MoveType::Index: { - Int64 index = (*arguments[j + 1].column)[row].get(); + Int64 index = (*arguments[j + 1].column)[row].safeGet(); if (!moveToElementByIndex(res_element, static_cast(index), key)) return false; break; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 65d7473b945..ff0cff09c9e 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -701,11 +701,11 @@ ColumnPtr FunctionAnyArityLogical::getConstantResultForNonConstArgum bool constant_value_bool = false; if (field_type == Field::Types::Float64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); else if (field_type == Field::Types::Int64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); else if (field_type == Field::Types::UInt64) - constant_value_bool = static_cast(constant_field_value.get()); + constant_value_bool = static_cast(constant_field_value.safeGet()); has_true_constant = has_true_constant || constant_value_bool; has_false_constant = has_false_constant || !constant_value_bool; diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index 6b65a5feaec..ed7fe1a5de1 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -500,7 +500,7 @@ inline Scale getScaleArg(const ColumnConst* scale_col) { const auto & scale_field = scale_col->getField(); - Int64 scale64 = scale_field.get(); + Int64 scale64 = scale_field.safeGet(); validateScale(scale64); return scale64; @@ -632,7 +632,7 @@ public: Scale raw_scale = scale64; DecimalRoundingImpl::applyOne(value_col_typed->getElement(i), value_col_typed->getScale(), - reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); + reinterpret_cast::NativeT&>(col_res->getElement(i)), raw_scale); } } } @@ -854,7 +854,7 @@ private: using ValueType = typename Container::value_type; std::vector boundary_values(boundaries.size()); for (size_t i = 0; i < boundaries.size(); ++i) - boundary_values[i] = static_cast(boundaries[i].get()); + boundary_values[i] = static_cast(boundaries[i].safeGet()); ::sort(boundary_values.begin(), boundary_values.end()); boundary_values.erase(std::unique(boundary_values.begin(), boundary_values.end()), boundary_values.end()); diff --git a/src/Functions/IFunctionCustomWeek.h b/src/Functions/IFunctionCustomWeek.h index 51542c9cab1..ba0baa35819 100644 --- a/src/Functions/IFunctionCustomWeek.h +++ b/src/Functions/IFunctionCustomWeek.h @@ -50,15 +50,15 @@ public: if (checkAndGetDataType(&type)) { - return Transform::FactorTransform::execute(UInt16(left.get()), date_lut) - == Transform::FactorTransform::execute(UInt16(right.get()), date_lut) + return Transform::FactorTransform::execute(UInt16(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt16(right.safeGet()), date_lut) ? is_monotonic : is_not_monotonic; } else { - return Transform::FactorTransform::execute(UInt32(left.get()), date_lut) - == Transform::FactorTransform::execute(UInt32(right.get()), date_lut) + return Transform::FactorTransform::execute(UInt32(left.safeGet()), date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), date_lut) ? is_monotonic : is_not_monotonic; } diff --git a/src/Functions/IFunctionDateOrDateTime.h b/src/Functions/IFunctionDateOrDateTime.h index 762b79bfafc..899aa2c305d 100644 --- a/src/Functions/IFunctionDateOrDateTime.h +++ b/src/Functions/IFunctionDateOrDateTime.h @@ -72,30 +72,30 @@ public: if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(UInt16(left.get()), *date_lut) - == Transform::FactorTransform::execute(UInt16(right.get()), *date_lut) + return Transform::FactorTransform::execute(UInt16(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(UInt16(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(Int32(left.get()), *date_lut) - == Transform::FactorTransform::execute(Int32(right.get()), *date_lut) + return Transform::FactorTransform::execute(Int32(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(Int32(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else if (checkAndGetDataType(type_ptr)) { - return Transform::FactorTransform::execute(UInt32(left.get()), *date_lut) - == Transform::FactorTransform::execute(UInt32(right.get()), *date_lut) + return Transform::FactorTransform::execute(UInt32(left.safeGet()), *date_lut) + == Transform::FactorTransform::execute(UInt32(right.safeGet()), *date_lut) ? is_monotonic : is_not_monotonic; } else { assert(checkAndGetDataType(type_ptr)); - const auto & left_date_time = left.get(); + const auto & left_date_time = left.safeGet(); TransformDateTime64 transformer_left(left_date_time.getScale()); - const auto & right_date_time = right.get(); + const auto & right_date_time = right.safeGet(); TransformDateTime64 transformer_right(right_date_time.getScale()); return transformer_left.execute(left_date_time.getValue(), *date_lut) diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp index fb74018b330..84ac0ff08f3 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -46,7 +46,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.first = static_cast(number_ptr->as()->value.get()); + range_indices.first = static_cast(number_ptr->as()->value.safeGet()); if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) { @@ -63,7 +63,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.second = static_cast(number_ptr->as()->value.get()); + range_indices.second = static_cast(number_ptr->as()->value.safeGet()); } else { diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 3e9c8fba215..c1a2fb6be2d 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -75,7 +75,7 @@ struct MultiMatchAllIndicesImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 20b2150048b..ce6a054c064 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -89,7 +89,7 @@ struct MultiMatchAnyImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length); diff --git a/src/Functions/MultiSearchAllPositionsImpl.h b/src/Functions/MultiSearchAllPositionsImpl.h index cfe60e51bcd..6c2cd215638 100644 --- a/src/Functions/MultiSearchAllPositionsImpl.h +++ b/src/Functions/MultiSearchAllPositionsImpl.h @@ -33,7 +33,7 @@ struct MultiSearchAllPositionsImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { diff --git a/src/Functions/MultiSearchFirstIndexImpl.h b/src/Functions/MultiSearchFirstIndexImpl.h index 36a5fd514d9..3c6ec8ead44 100644 --- a/src/Functions/MultiSearchFirstIndexImpl.h +++ b/src/Functions/MultiSearchFirstIndexImpl.h @@ -44,7 +44,7 @@ struct MultiSearchFirstIndexImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto searcher = Impl::createMultiSearcherInBigHaystack(needles); diff --git a/src/Functions/MultiSearchFirstPositionImpl.h b/src/Functions/MultiSearchFirstPositionImpl.h index ccdd82a0ee5..b5c68ad664d 100644 --- a/src/Functions/MultiSearchFirstPositionImpl.h +++ b/src/Functions/MultiSearchFirstPositionImpl.h @@ -44,7 +44,7 @@ struct MultiSearchFirstPositionImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 { diff --git a/src/Functions/MultiSearchImpl.h b/src/Functions/MultiSearchImpl.h index 467cc96a95f..3eb8e6fb627 100644 --- a/src/Functions/MultiSearchImpl.h +++ b/src/Functions/MultiSearchImpl.h @@ -44,7 +44,7 @@ struct MultiSearchImpl std::vector needles; needles.reserve(needles_arr.size()); for (const auto & needle : needles_arr) - needles.emplace_back(needle.get()); + needles.emplace_back(needle.safeGet()); auto searcher = Impl::createMultiSearcherInBigHaystack(needles); diff --git a/src/Functions/URL/cutURLParameter.cpp b/src/Functions/URL/cutURLParameter.cpp index 3ab9cad1ea7..4439e79e962 100644 --- a/src/Functions/URL/cutURLParameter.cpp +++ b/src/Functions/URL/cutURLParameter.cpp @@ -156,7 +156,7 @@ public: for (size_t j = 0; j < num_needles; ++j) { auto field = col_needle_const_array->getData()[j]; - cutURL(res_data, field.get(), res_offset, cur_res_offset); + cutURL(res_data, field.safeGet(), res_offset, cur_res_offset); } } else diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 227b29d5d9f..81f3f97979b 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -904,10 +904,10 @@ ColumnPtr FunctionArrayElement::executeNumberConst( return nullptr; if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), index.get() - 1, col_res_vec->getData(), builder); + col_nested->getData(), col_array->getOffsets(), index.safeGet() - 1, col_res_vec->getData(), builder); } else if (index.getType() == Field::Types::Int64) { @@ -972,14 +972,14 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument auto col_res = ColumnString::create(); if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (builder) ArrayElementStringImpl::vectorConst( col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - index.get() - 1, + index.safeGet() - 1, col_res->getChars(), col_res->getOffsets(), builder); @@ -988,7 +988,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - index.get() - 1, + index.safeGet() - 1, col_res->getChars(), col_res->getOffsets(), builder); @@ -1000,7 +1000,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -1009,7 +1009,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -1046,7 +1046,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( auto res_offsets = ColumnArray::ColumnOffsets::create(); auto res_string_null_map = col_nullable ? ColumnUInt8::create() : nullptr; if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (col_nullable) ArrayElementArrayStringImpl::vectorConst( @@ -1055,7 +1055,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), &string_null_map->getData(), - index.get() - 1, + index.safeGet() - 1, res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1068,7 +1068,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), nullptr, - index.get() - 1, + index.safeGet() - 1, res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1084,7 +1084,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), &string_null_map->getData(), - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1097,7 +1097,7 @@ ColumnPtr FunctionArrayElement::executeArrayStringConst( col_nested_array->getOffsets(), col_nested_elem->getOffsets(), nullptr, - -(UInt64(index.get()) + 1), + -(UInt64(index.safeGet()) + 1), res_string->getChars(), res_offsets->getData(), res_string->getOffsets(), @@ -1153,7 +1153,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( auto & res_offsets = res_array->getOffsets(); NullMap * res_null_map = res_nullable ? &res_nullable->getNullMapData() : nullptr; - if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + if (index.getType() == Field::Types::UInt64 || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) { if (col_nullable) ArrayElementArrayNumImpl::template vectorConst( @@ -1161,7 +1161,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( col_array->getOffsets(), col_nested_array->getOffsets(), null_map, - index.get() - 1, + index.safeGet() - 1, res_data->getData(), res_offsets, res_null_map, @@ -1172,7 +1172,7 @@ ColumnPtr FunctionArrayElement::executeArrayNumberConst( col_array->getOffsets(), col_nested_array->getOffsets(), null_map, - index.get() - 1, + index.safeGet() - 1, res_data->getData(), res_offsets, res_null_map, @@ -1392,12 +1392,12 @@ ColumnPtr FunctionArrayElement::executeGenericConst( auto col_res = col_nested.cloneEmpty(); if (index.getType() == Field::Types::UInt64 - || (index.getType() == Field::Types::Int64 && index.get() >= 0)) + || (index.getType() == Field::Types::Int64 && index.safeGet() >= 0)) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), index.get() - 1, *col_res, builder); + col_nested, col_array->getOffsets(), index.safeGet() - 1, *col_res, builder); else if (index.getType() == Field::Types::Int64) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), -(static_cast(index.get() + 1)), *col_res, builder); + col_nested, col_array->getOffsets(), -(static_cast(index.safeGet() + 1)), *col_res, builder); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); @@ -1789,7 +1789,7 @@ bool FunctionArrayElement::matchKeyToIndexStringConst( using DataColumn = std::decay_t; if (index.getType() != Field::Types::String) return false; - MatcherStringConst matcher{data_column, index.get()}; + MatcherStringConst matcher{data_column, index.safeGet()}; executeMatchKeyToIndex(offsets, matched_idxs, matcher); return true; }); diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 86797cb5db0..614b01c2ac8 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -237,7 +237,7 @@ private: } arg.val_column->get(offset + j, temp_val); - ValType value = temp_val.get(); + ValType value = temp_val.safeGet(); if constexpr (op_type == OpTypes::ADD) { diff --git a/src/Functions/getClientHTTPHeader.cpp b/src/Functions/getClientHTTPHeader.cpp index 140f39d03b8..50a6275fc82 100644 --- a/src/Functions/getClientHTTPHeader.cpp +++ b/src/Functions/getClientHTTPHeader.cpp @@ -58,7 +58,7 @@ public: { Field header; source->get(row, header); - if (auto it = client_info.http_headers.find(header.get()); it != client_info.http_headers.end()) + if (auto it = client_info.http_headers.find(header.safeGet()); it != client_info.http_headers.end()) result->insert(it->second); else result->insertDefault(); diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index c4b675fcf6c..14b8b70b22c 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -200,7 +200,7 @@ public: if (value.isNull()) continue; - if (value.get() == 0) + if (value.safeGet() == 0) continue; instruction.condition_always_true = true; diff --git a/src/Functions/nested.cpp b/src/Functions/nested.cpp index bdaf57d65c9..85c342b5e7c 100644 --- a/src/Functions/nested.cpp +++ b/src/Functions/nested.cpp @@ -145,7 +145,7 @@ private: if (nested_names_field.getType() != Field::Types::Array) return {}; - const auto & nested_names_array = nested_names_field.get(); + const auto & nested_names_array = nested_names_field.safeGet(); Names nested_names; nested_names.reserve(nested_names_array.size()); @@ -155,7 +155,7 @@ private: if (nested_name_field.getType() != Field::Types::String) return {}; - nested_names.push_back(nested_name_field.get()); + nested_names.push_back(nested_name_field.safeGet()); } return nested_names; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index a794cdbcf05..59040bf1fea 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -294,21 +294,21 @@ void RequestSettings::finishInit(const DB::Settings & settings, bool validate_se /// to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. /// But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").get() : settings.s3_max_get_rps) + if (UInt64 max_get_rps = isChanged("max_get_rps") ? get("max_get_rps").safeGet() : settings.s3_max_get_rps) { size_t default_max_get_burst = settings.s3_max_get_burst ? settings.s3_max_get_burst : (Throttler::default_burst_seconds * max_get_rps); - size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").get() : default_max_get_burst; + size_t max_get_burst = isChanged("max_get_burts") ? get("max_get_burst").safeGet() : default_max_get_burst; get_request_throttler = std::make_shared(max_get_rps, max_get_burst); } - if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").get() : settings.s3_max_put_rps) + if (UInt64 max_put_rps = isChanged("max_put_rps") ? get("max_put_rps").safeGet() : settings.s3_max_put_rps) { size_t default_max_put_burst = settings.s3_max_put_burst ? settings.s3_max_put_burst : (Throttler::default_burst_seconds * max_put_rps); - size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").get() : default_max_put_burst; + size_t max_put_burst = isChanged("max_put_burts") ? get("max_put_burst").safeGet() : default_max_put_burst; put_request_throttler = std::make_shared(max_put_rps, max_put_burst); } } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e1b7e92ee5d..368eb8174f0 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -131,7 +131,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Invalid type in set. Expected tuple, got {}", String(value.getTypeName())); - const auto & tuple = value.template get(); + const auto & tuple = value.template safeGet(); size_t tuple_size = tuple.size(); if (tuple_size != columns_num) throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", @@ -233,7 +233,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co "Invalid type of set. Expected tuple, got {}", function_result.getTypeName()); - tuple = &function_result.get(); + tuple = &function_result.safeGet(); } /// Tuple can be represented as a literal in AST. @@ -246,7 +246,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co "Invalid type in set. Expected tuple, got {}", literal->value.getTypeName()); - tuple = &literal->value.get(); + tuple = &literal->value.safeGet(); } assert(tuple || func); @@ -332,14 +332,14 @@ Block createBlockForSet( if (type_index == TypeIndex::Tuple) { const DataTypes & value_types = assert_cast(right_arg_type.get())->getElements(); - block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + block = createBlockFromCollection(right_arg_value.safeGet(), value_types, set_element_types, tranform_null_in); } else if (type_index == TypeIndex::Array) { const auto* right_arg_array_type = assert_cast(right_arg_type.get()); - size_t right_arg_array_size = right_arg_value.get().size(); + size_t right_arg_array_size = right_arg_value.safeGet().size(); DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType()); - block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); + block = createBlockFromCollection(right_arg_value.safeGet(), value_types, set_element_types, tranform_null_in); } else throw_unsupported_type(right_arg_type); diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 356bffa75e9..302a5e55c53 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -201,7 +201,7 @@ private: if (literal_value.getType() != Field::Types::String) continue; - auto dictionary_name = literal_value.get(); + auto dictionary_name = literal_value.safeGet(); auto qualified_dictionary_name = context->getExternalDictionariesLoader().qualifyDictionaryNameWithDatabase(dictionary_name, context); literal_value = qualified_dictionary_name.getFullName(); } diff --git a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp index 4f06f345b96..b9f7f37b338 100644 --- a/src/Interpreters/ComparisonTupleEliminationVisitor.cpp +++ b/src/Interpreters/ComparisonTupleEliminationVisitor.cpp @@ -22,7 +22,7 @@ ASTs splitTuple(const ASTPtr & node) if (const auto * literal = node->as(); literal && literal->value.getType() == Field::Types::Tuple) { ASTs result; - const auto & tuple = literal->value.get(); + const auto & tuple = literal->value.safeGet(); for (const auto & child : tuple) result.emplace_back(std::make_shared(child)); return result; diff --git a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp index 084bb0a1bb9..220355e0741 100644 --- a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp +++ b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp @@ -45,7 +45,7 @@ void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) if (!identifier || !literal || literal->value.getType() != Field::Types::String) continue; - String regexp = likePatternToRegexp(literal->value.get()); + String regexp = likePatternToRegexp(literal->value.safeGet()); /// Case insensitive. Works with UTF-8 as well. if (is_ilike) regexp = "(?i)" + regexp; @@ -61,7 +61,7 @@ void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) match->arguments->children.push_back(it->second); unique_elems.push_back(std::move(match)); } - it->second->value.get().push_back(regexp); + it->second->value.safeGet().push_back(regexp); } } diff --git a/src/Interpreters/ConvertStringsToEnumVisitor.cpp b/src/Interpreters/ConvertStringsToEnumVisitor.cpp index 7cc95dc521b..d35baa92900 100644 --- a/src/Interpreters/ConvertStringsToEnumVisitor.cpp +++ b/src/Interpreters/ConvertStringsToEnumVisitor.cpp @@ -33,8 +33,8 @@ String makeStringsEnum(const std::set & values) void changeIfArguments(ASTPtr & first, ASTPtr & second) { - String first_value = first->as()->value.get(); - String second_value = second->as()->value.get(); + String first_value = first->as()->value.safeGet(); + String second_value = second->as()->value.safeGet(); std::set values; values.insert(first_value); @@ -59,9 +59,9 @@ void changeTransformArguments(ASTPtr & array_to, ASTPtr & other) { std::set values; - for (const auto & item : array_to->as()->value.get()) - values.insert(item.get()); - values.insert(other->as()->value.get()); + for (const auto & item : array_to->as()->value.safeGet()) + values.insert(item.safeGet()); + values.insert(other->as()->value.safeGet()); String enum_string = makeStringsEnum(values); @@ -168,7 +168,7 @@ void ConvertStringsToEnumMatcher::visit(ASTFunction & function_node, Data & data if (literal_to->value.getTypeName() != "Array" || literal_other->value.getTypeName() != "String") return; - Array array_to = literal_to->value.get(); + Array array_to = literal_to->value.safeGet(); if (array_to.empty()) return; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index fa197d59c13..6e08dd5e2cc 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -538,7 +538,7 @@ void DatabaseReplicatedTask::createSyncedNodeIfNeed(const ZooKeeperPtr & zookeep /// Bool type is really weird, sometimes it's Bool and sometimes it's UInt64... assert(value.getType() == Field::Types::Bool || value.getType() == Field::Types::UInt64); - if (!value.get()) + if (!value.safeGet()) return; zookeeper->createIfNotExists(getSyncedNodePath(), ""); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 971f90bd3cd..15c6aba8b62 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -689,7 +689,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( throw Exception(ErrorCodes::LOGICAL_ERROR, "Neither default value expression nor type is provided for a column"); if (col_decl.comment) - column.comment = col_decl.comment->as().value.get(); + column.comment = col_decl.comment->as().value.safeGet(); if (col_decl.codec) { @@ -1875,7 +1875,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont if (has_explicit_zk_path_arg) { - String zk_path = create.storage->engine->arguments->children[0]->as()->value.get(); + String zk_path = create.storage->engine->arguments->children[0]->as()->value.safeGet(); Macros::MacroExpansionInfo info; info.table_id.uuid = create.uuid; info.ignore_unknown = true; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index bedd9cb4a80..c820f999e0c 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -332,7 +332,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) if (settings.hasBooleanSetting(change.name)) { - auto value = change.value.get(); + auto value = change.value.safeGet(); if (value > 1) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value {} for setting \"{}\". " "Expected boolean type", value, change.name); @@ -341,7 +341,7 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) } else { - auto value = change.value.get(); + auto value = change.value.safeGet(); settings.setIntegerSetting(change.name, value); } } diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 7eb487ba7b3..2c579f3b468 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -334,7 +334,7 @@ BlockIO InterpreterKillQueryQuery::execute() for (size_t i = 0; i < moves_block.rows(); ++i) { table_id = StorageID{database_col.getDataAt(i).toString(), table_col.getDataAt(i).toString()}; - auto task_uuid = task_uuid_col[i].get(); + auto task_uuid = task_uuid_col[i].safeGet(); CancellationCode code = CancellationCode::Unknown; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cb42a8abf9c..0c79f4310ce 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1232,7 +1232,7 @@ SortDescription InterpreterSelectQuery::getSortDescription(const ASTSelectQuery std::shared_ptr collator; if (order_by_elem.getCollation()) - collator = std::make_shared(order_by_elem.getCollation()->as().value.get()); + collator = std::make_shared(order_by_elem.getCollation()->as().value.safeGet()); if (order_by_elem.with_fill) { diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 57ad5caa4c7..dfffeb437d4 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -220,7 +220,7 @@ bool isStorageTouchedByMutations( Block tmp_block; while (executor.pull(tmp_block)); - auto count = (*block.getByName("count()").column)[0].get(); + auto count = (*block.getByName("count()").column)[0].safeGet(); return count != 0; } diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index dd205ae6508..913f9900b77 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -42,13 +42,13 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy if (isDateOrDate32(column.type.get())) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time = date_lut.dateToString(range.first.safeGet()); + end_date_or_date_time = date_lut.dateToString(range.second.safeGet()); } else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); + start_date_or_date_time = date_lut.timeToString(range.first.safeGet()); + end_date_or_date_time = date_lut.timeToString(range.second.safeGet()); } else [[unlikely]] return {}; diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 48c9988b6fc..e9a663d53b0 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -24,7 +24,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v if (literal->value.getType() == Field::Types::Int64 || literal->value.getType() == Field::Types::UInt64) { - value = literal->value.get(); + value = literal->value.safeGet(); return true; } if (literal->value.getType() == Field::Types::Null) @@ -51,7 +51,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v { if (type_literal->value.getType() == Field::Types::String) { - const auto & type_str = type_literal->value.get(); + const auto & type_str = type_literal->value.safeGet(); if (type_str == "UInt8" || type_str == "Nullable(UInt8)") return tryExtractConstValueFromCondition(expr_list->children.at(0), value); } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 54515ea072a..6a0522b0676 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -72,7 +72,7 @@ bool shardContains( if (sharding_value.isNull()) return false; - UInt64 value = sharding_value.get(); + UInt64 value = sharding_value.safeGet(); const auto shard_num = data.slots[value % data.slots.size()] + 1; return data.shard_info.shard_num == shard_num; } @@ -120,11 +120,20 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d else if (auto * tuple_literal = right->as(); tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple) { - auto & tuple = tuple_literal->value.get(); - std::erase_if(tuple, [&](auto & child) + auto & tuple = tuple_literal->value.safeGet(); + if (tuple.size() > 1) { - return tuple.size() > 1 && !shardContains(child, name, data); - }); + Tuple new_tuple; + + for (const auto & child : tuple) + if (shardContains(child, name, data)) + new_tuple.emplace_back(std::move(child)); + + if (new_tuple.size() == 0) + new_tuple.emplace_back(std::move(tuple.back())); + + tuple_literal->value = std::move(new_tuple); + } } } @@ -159,7 +168,7 @@ public: { if (isTuple(constant->getResultType())) { - const auto & tuple = constant->getValue().get(); + const auto & tuple = constant->getValue().safeGet(); Tuple new_tuple; new_tuple.reserve(tuple.size()); diff --git a/src/Interpreters/RewriteCountVariantsVisitor.cpp b/src/Interpreters/RewriteCountVariantsVisitor.cpp index 4a541c3765a..272e1ac735f 100644 --- a/src/Interpreters/RewriteCountVariantsVisitor.cpp +++ b/src/Interpreters/RewriteCountVariantsVisitor.cpp @@ -53,7 +53,7 @@ void RewriteCountVariantsVisitor::visit(ASTFunction & func) { if (first_arg_literal->value.getType() == Field::Types::UInt64) { - auto constant = first_arg_literal->value.get(); + auto constant = first_arg_literal->value.safeGet(); if (constant == 1 && !context->getSettingsRef().aggregate_functions_null_for_empty) transform = true; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b872eb94fde..6483dd3be48 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -184,7 +184,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) const auto & value = group_exprs[i]->as()->value; if (value.getType() == Field::Types::UInt64) { - auto pos = value.get(); + auto pos = value.safeGet(); if (pos > 0 && pos <= select_query->select()->children.size()) keep_position = true; } diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 31a881001e3..b1e12ff8048 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -94,8 +94,8 @@ void WindowFrame::checkValid() const if (begin_type == BoundaryType::Offset && !((begin_offset.getType() == Field::Types::UInt64 || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) + && begin_offset.safeGet() >= 0 + && begin_offset.safeGet() < INT_MAX)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", @@ -107,8 +107,8 @@ void WindowFrame::checkValid() const if (end_type == BoundaryType::Offset && !((end_offset.getType() == Field::Types::UInt64 || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) + && end_offset.safeGet() >= 0 + && end_offset.safeGet() < INT_MAX)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 1a7c166c6a5..f3957c3b69b 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -57,7 +57,7 @@ template Field convertNumericTypeImpl(const Field & from) { To result; - if (!accurate::convertNumeric(from.get(), result)) + if (!accurate::convertNumeric(from.safeGet(), result)) return {}; return result; } @@ -88,7 +88,7 @@ Field convertNumericType(const Field & from, const IDataType & type) template Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & type) { - From value = from.get(); + From value = from.safeGet(); if (!type.canStoreWhole(value)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); @@ -100,7 +100,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ template Field convertStringToDecimalType(const Field & from, const DataTypeDecimal & type) { - const String & str_value = from.get(); + const String & str_value = from.safeGet(); T value = type.parseFromString(str_value); return DecimalField(value, type.getScale()); } @@ -108,7 +108,7 @@ Field convertStringToDecimalType(const Field & from, const DataTypeDecimal & template Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & type) { - auto field = from.get>(); + auto field = from.safeGet>(); T value = convertDecimals, DataTypeDecimal>(field.getValue(), field.getScale(), type.getScale()); return DecimalField(value, type.getScale()); } @@ -116,7 +116,7 @@ Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal & template Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & type) { - From value = from.get(); + From value = from.safeGet(); if (!type.canStoreWhole(value)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); @@ -182,24 +182,24 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Conversion between Date and DateTime and vice versa. if (which_type.isDate() && which_from_type.isDateTime()) { - return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.safeGet()).toUnderType()); } else if (which_type.isDate32() && which_from_type.isDateTime()) { - return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.get()).toUnderType()); + return static_cast(static_cast(*from_type_hint).getTimeZone().toDayNum(src.safeGet()).toUnderType()); } else if (which_type.isDateTime() && which_from_type.isDate()) { - return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.safeGet())); } else if (which_type.isDateTime() && which_from_type.isDate32()) { - return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); + return static_cast(type).getTimeZone().fromDayNum(DayNum(src.safeGet())); } else if (which_type.isDateTime64() && which_from_type.isDate()) { const auto & date_time64_type = static_cast(type); - const auto value = date_time64_type.getTimeZone().fromDayNum(DayNum(src.get())); + const auto value = date_time64_type.getTimeZone().fromDayNum(DayNum(src.safeGet())); return DecimalField( DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, date_time64_type.getScaleMultiplier()), date_time64_type.getScale()); @@ -207,7 +207,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID else if (which_type.isDateTime64() && which_from_type.isDate32()) { const auto & date_time64_type = static_cast(type); - const auto value = date_time64_type.getTimeZone().fromDayNum(ExtendedDayNum(static_cast(src.get()))); + const auto value = date_time64_type.getTimeZone().fromDayNum(ExtendedDayNum(static_cast(src.safeGet()))); return DecimalField( DecimalUtils::decimalFromComponentsWithMultiplier(value, 0, date_time64_type.getScaleMultiplier()), date_time64_type.getScale()); @@ -253,7 +253,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) { - const auto & from_type = src.get(); + const auto & from_type = src.safeGet(); const auto & to_type = static_cast(type); const auto scale_from = from_type.getScale(); @@ -318,7 +318,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_from_type.isFixedString() && assert_cast(from_type_hint)->getN() == IPV6_BINARY_LENGTH) { const auto col = type.createColumn(); - ReadBufferFromString in_buffer(src.get()); + ReadBufferFromString in_buffer(src.safeGet()); type.getDefaultSerialization()->deserializeBinary(*col, in_buffer, {}); return (*col)[0]; } @@ -330,7 +330,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (which_type.isFixedString()) { size_t n = assert_cast(type).getN(); - const auto & src_str = src.get(); + const auto & src_str = src.safeGet(); if (src_str.size() < n) { String src_str_extended = src_str; @@ -347,7 +347,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Array) { - const Array & src_arr = src.get(); + const Array & src_arr = src.safeGet(); size_t src_arr_size = src_arr.size(); const auto & element_type = *(type_array->getNestedType()); @@ -370,7 +370,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { if (src.getType() == Field::Types::Tuple) { - const auto & src_tuple = src.get(); + const auto & src_tuple = src.safeGet(); size_t src_tuple_size = src_tuple.size(); size_t dst_tuple_size = type_tuple->getElements().size(); @@ -415,7 +415,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID const auto & key_type = *type_map->getKeyType(); const auto & value_type = *type_map->getValueType(); - const auto & map = src.get(); + const auto & map = src.safeGet(); size_t map_size = map.size(); Map res(map_size); @@ -424,7 +424,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID for (size_t i = 0; i < map_size; ++i) { - const auto & map_entry = map[i].get(); + const auto & map_entry = map[i].safeGet(); const auto & key = map_entry[0]; const auto & value = map_entry[1]; @@ -453,7 +453,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID "Cannot convert {} to {}", src.getTypeName(), agg_func_type->getName()); - const auto & name = src.get().name; + const auto & name = src.safeGet().name; if (agg_func_type->getName() != name) throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert {} to {}", name, agg_func_type->getName()); @@ -468,7 +468,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames()) { const auto & names = from_type_tuple->getElementNames(); - const auto & tuple = src.get(); + const auto & tuple = src.safeGet(); if (names.size() != tuple.size()) throw Exception(ErrorCodes::TYPE_MISMATCH, @@ -485,10 +485,10 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (src.getType() == Field::Types::Map) { Object object; - const auto & map = src.get(); + const auto & map = src.safeGet(); for (const auto & element : map) { - const auto & map_entry = element.get(); + const auto & map_entry = element.safeGet(); const auto & key = map_entry[0]; const auto & value = map_entry[1]; @@ -496,7 +496,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert from Map with key of type {} to Object", key.getTypeName()); - object[key.get()] = value; + object[key.safeGet()] = value; } return object; @@ -537,7 +537,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } const auto col = type_to_parse->createColumn(); - ReadBufferFromString in_buffer(src.get()); + ReadBufferFromString in_buffer(src.safeGet()); try { type_to_parse->getDefaultSerialization()->deserializeWholeText(*col, in_buffer, FormatSettings{}); @@ -545,9 +545,9 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID catch (Exception & e) { if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.get(), type.getName()); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string '{}' to type {}", src.safeGet(), type.getName()); - e.addMessage(fmt::format("while converting '{}' to {}", src.get(), type.getName())); + e.addMessage(fmt::format("while converting '{}' to {}", src.safeGet(), type.getName())); throw; } @@ -610,7 +610,7 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t template static bool decimalEqualsFloat(Field field, Float64 float_value) { - auto decimal_field = field.get>(); + auto decimal_field = field.safeGet>(); auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); return decimal_to_float == float_value; } @@ -629,13 +629,13 @@ std::optional convertFieldToTypeStrict(const Field & from_value, const ID { /// Convert back to Float64 and compare if (result_value.getType() == Field::Types::Decimal32) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal64) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal128) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; if (result_value.getType() == Field::Types::Decimal256) - return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + return decimalEqualsFloat(result_value, from_value.safeGet()) ? result_value : std::optional{}; throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 4bfc80af1fe..d4bb0cc2f8a 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -297,7 +297,7 @@ namespace { if (tuple_literal->value.getType() == Field::Types::Tuple) { - const auto & tuple = tuple_literal->value.get(); + const auto & tuple = tuple_literal->value.safeGet(); for (const auto & child : tuple) { const auto dnf = analyzeEquals(identifier, child, expr); @@ -792,7 +792,7 @@ std::optional evaluateExpressionOverConstantCondition(const ASTPtr & nod else if (const auto * literal = node->as()) { // Check if it's always true or false. - if (literal->value.getType() == Field::Types::UInt64 && literal->value.get() == 0) + if (literal->value.getType() == Field::Types::UInt64 && literal->value.safeGet() == 0) return {result}; else return {}; diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index 3d60723a167..ee967f45c74 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -35,11 +35,11 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel if (which == Field::Types::UInt64) { - pos = ast_literal->value.get(); + pos = ast_literal->value.safeGet(); } else if (which == Field::Types::Int64) { - auto value = ast_literal->value.get(); + auto value = ast_literal->value.safeGet(); if (value > 0) pos = value; else diff --git a/src/Interpreters/tests/gtest_comparison_graph.cpp b/src/Interpreters/tests/gtest_comparison_graph.cpp index ac24a8de368..5f93bb983c1 100644 --- a/src/Interpreters/tests/gtest_comparison_graph.cpp +++ b/src/Interpreters/tests/gtest_comparison_graph.cpp @@ -29,7 +29,7 @@ TEST(ComparisonGraph, Bounds) const auto & [lower, strict] = *res; - ASSERT_EQ(lower.get(), 3); + ASSERT_EQ(lower.safeGet(), 3); ASSERT_TRUE(strict); } @@ -39,7 +39,7 @@ TEST(ComparisonGraph, Bounds) const auto & [upper, strict] = *res; - ASSERT_EQ(upper.get(), 7); + ASSERT_EQ(upper.safeGet(), 7); ASSERT_TRUE(strict); } diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index cd9e910d45a..d42728addb7 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -522,7 +522,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (tuple_arguments_valid && lit_right) { if (isInt64OrUInt64FieldType(lit_right->value.getType()) - && lit_right->value.get() >= 0) + && lit_right->value.safeGet() >= 0) { if (frame.need_parens) settings.ostr << '('; diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 8dedc5dc95d..515f4f0cb9f 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -73,8 +73,8 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const /// Special case for very large arrays and tuples. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing) - || (type == Field::Types::Tuple && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.safeGet().size() > min_elements_for_hashing) + || (type == Field::Types::Tuple && value.safeGet().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); @@ -92,7 +92,7 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const /// for tons of literals as it creates temporary String. if (value.getType() == Field::Types::String) { - writeQuoted(value.get(), ostr); + writeQuoted(value.safeGet(), ostr); } else { @@ -110,7 +110,7 @@ void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const /// Special case for very large arrays. Instead of listing all elements, will use hash of them. /// (Otherwise column name will be too long, that will lead to significant slowdown of expression analysis.) auto type = value.getType(); - if ((type == Field::Types::Array && value.get().size() > min_elements_for_hashing)) + if ((type == Field::Types::Array && value.safeGet().size() > min_elements_for_hashing)) { SipHash hash; applyVisitor(FieldVisitorHash(hash), value); diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index ddfdbe38903..ddf4e9ecda5 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -114,7 +114,7 @@ namespace T fieldToNumber(const Field & f) { if (f.getType() == Field::Types::String) - return parseWithSizeSuffix(boost::algorithm::trim_copy(f.get())); + return parseWithSizeSuffix(boost::algorithm::trim_copy(f.safeGet())); else return applyVisitor(FieldVisitorConvertToNumber(), f); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 9927acdcf17..de395d120d7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -314,7 +314,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (parts.size() == 1) node = std::make_shared(parts[0], std::move(params)); @@ -1626,7 +1626,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (!parser_string_literal.parse(pos, ast_prefix_name, expected)) return false; - column_name_prefix = ast_prefix_name->as().value.get(); + column_name_prefix = ast_prefix_name->as().value.safeGet(); } if (with_open_round_bracket) @@ -1689,7 +1689,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e auto res = std::make_shared(); if (regexp_node) - res->setPattern(regexp_node->as().value.get()); + res->setPattern(regexp_node->as().value.safeGet()); else res->children = std::move(identifiers); res->is_strict = is_strict; @@ -1861,7 +1861,7 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected else { auto regexp_matcher = std::make_shared(); - regexp_matcher->setPattern(regexp_node->as().value.get()); + regexp_matcher->setPattern(regexp_node->as().value.safeGet()); if (!transformers->children.empty()) { @@ -2310,7 +2310,7 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - destination_name = ast_space_name->as().value.get(); + destination_name = ast_space_name->as().value.safeGet(); } else if (mode == TTLMode::GROUP_BY) { diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index dbefb0cb966..73fd563faf6 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -517,7 +517,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); + command->move_destination_name = ast_space_name->as().value.safeGet(); } else if (s_move_partition.ignore(pos, expected)) { @@ -545,7 +545,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_space_name, expected)) return false; - command->move_destination_name = ast_space_name->as().value.get(); + command->move_destination_name = ast_space_name->as().value.safeGet(); } } else if (s_add_constraint.ignore(pos, expected)) @@ -638,7 +638,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_from, expected)) return false; - command->from = ast_from->as().value.get(); + command->from = ast_from->as().value.safeGet(); command->type = ASTAlterCommand::FETCH_PARTITION; } else if (s_fetch_part.ignore(pos, expected)) @@ -652,7 +652,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ASTPtr ast_from; if (!parser_string_literal.parse(pos, ast_from, expected)) return false; - command->from = ast_from->as().value.get(); + command->from = ast_from->as().value.safeGet(); command->part = true; command->type = ASTAlterCommand::FETCH_PARTITION; } @@ -680,7 +680,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_with_name, expected)) return false; - command->with_name = ast_with_name->as().value.get(); + command->with_name = ast_with_name->as().value.safeGet(); } } else if (s_unfreeze.ignore(pos, expected)) @@ -707,7 +707,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_string_literal.parse(pos, ast_with_name, expected)) return false; - command->with_name = ast_with_name->as().value.get(); + command->with_name = ast_with_name->as().value.safeGet(); } else { diff --git a/src/Parsers/ParserCheckQuery.cpp b/src/Parsers/ParserCheckQuery.cpp index 42716ba7f2c..33b6a5a1ac2 100644 --- a/src/Parsers/ParserCheckQuery.cpp +++ b/src/Parsers/ParserCheckQuery.cpp @@ -55,7 +55,7 @@ bool ParserCheckQuery::parseCheckTable(Pos & pos, ASTPtr & node, Expected & expe const auto * ast_literal = ast_part_name->as(); if (!ast_literal || ast_literal->value.getType() != Field::Types::String) return false; - query->part_name = ast_literal->value.get(); + query->part_name = ast_literal->value.safeGet(); } if (query->database) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index a592975613b..3621a695272 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -922,7 +922,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->is_create_empty = is_create_empty; if (from_path) - query->attach_from_path = from_path->as().value.get(); + query->attach_from_path = from_path->as().value.safeGet(); return true; } @@ -1431,7 +1431,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (s_on.ignore(pos, expected)) diff --git a/src/Parsers/ParserDictionary.cpp b/src/Parsers/ParserDictionary.cpp index 83a006231d9..ce38d1b54d1 100644 --- a/src/Parsers/ParserDictionary.cpp +++ b/src/Parsers/ParserDictionary.cpp @@ -33,7 +33,7 @@ bool ParserDictionaryLifetime::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (literal.value.getType() != Field::Types::UInt64) return false; - res->max_sec = literal.value.get(); + res->max_sec = literal.value.safeGet(); node = res; return true; } @@ -58,10 +58,10 @@ bool ParserDictionaryLifetime::parseImpl(Pos & pos, ASTPtr & node, Expected & ex return false; if (pair.first == "min") - res->min_sec = literal->value.get(); + res->min_sec = literal->value.safeGet(); else if (pair.first == "max") { - res->max_sec = literal->value.get(); + res->max_sec = literal->value.safeGet(); initialized_max = true; } else diff --git a/src/Parsers/ParserPartition.cpp b/src/Parsers/ParserPartition.cpp index 80a28f4803e..ab97b3d0e3b 100644 --- a/src/Parsers/ParserPartition.cpp +++ b/src/Parsers/ParserPartition.cpp @@ -65,7 +65,7 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (literal_ast->value.getType() == Field::Types::Tuple) { - fields_count = literal_ast->value.get().size(); + fields_count = literal_ast->value.safeGet().size(); } else { diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 0545c3e5568..81b64ab47c6 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -445,7 +445,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (!ParserStringLiteral{}.parse(pos, ast, expected)) return false; - String time_str = ast->as().value.get(); + String time_str = ast->as().value.safeGet(); ReadBufferFromString buf(time_str); time_t time; readDateTimeText(time, buf); @@ -467,7 +467,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; } - res->seconds = seconds->as()->value.get(); + res->seconds = seconds->as()->value.safeGet(); break; } case Type::DROP_FILESYSTEM_CACHE: @@ -538,7 +538,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (ParserKeyword{Keyword::WITH_NAME}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected)) { - res->backup_name = ast->as().value.get(); + res->backup_name = ast->as().value.safeGet(); } else { @@ -577,7 +577,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & if (!ParserStringLiteral{}.parse(pos, ast, expected)) return false; - custom_name = ast->as().value.get(); + custom_name = ast->as().value.safeGet(); } return true; diff --git a/src/Parsers/ParserUndropQuery.cpp b/src/Parsers/ParserUndropQuery.cpp index 07ca8a3b5fd..57da47df70d 100644 --- a/src/Parsers/ParserUndropQuery.cpp +++ b/src/Parsers/ParserUndropQuery.cpp @@ -41,7 +41,7 @@ bool parseUndropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected) ASTPtr ast_uuid; if (!uuid_p.parse(pos, ast_uuid, expected)) return false; - uuid = parseFromString(ast_uuid->as()->value.get()); + uuid = parseFromString(ast_uuid->as()->value.safeGet()); } if (ParserKeyword{Keyword::ON}.ignore(pos, expected)) { diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index a1ba46125a7..af3591750a1 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -56,21 +56,21 @@ TEST(ParserDictionaryDDL, SimpleDictionary) EXPECT_EQ(create->dictionary->source->name, "clickhouse"); auto children = create->dictionary->source->elements->children; EXPECT_EQ(children[0]->as() -> first, "host"); - EXPECT_EQ(children[0]->as()->second->as()->value.get(), "localhost"); + EXPECT_EQ(children[0]->as()->second->as()->value.safeGet(), "localhost"); EXPECT_EQ(children[1]->as()->first, "port"); - EXPECT_EQ(children[1]->as()->second->as()->value.get(), 9000); + EXPECT_EQ(children[1]->as()->second->as()->value.safeGet(), 9000); EXPECT_EQ(children[2]->as()->first, "user"); - EXPECT_EQ(children[2]->as()->second->as()->value.get(), "default"); + EXPECT_EQ(children[2]->as()->second->as()->value.safeGet(), "default"); EXPECT_EQ(children[3]->as()->first, "password"); - EXPECT_EQ(children[3]->as()->second->as()->value.get(), ""); + EXPECT_EQ(children[3]->as()->second->as()->value.safeGet(), ""); EXPECT_EQ(children[4]->as()->first, "db"); - EXPECT_EQ(children[4]->as()->second->as()->value.get(), "test"); + EXPECT_EQ(children[4]->as()->second->as()->value.safeGet(), "test"); EXPECT_EQ(children[5]->as()->first, "table"); - EXPECT_EQ(children[5]->as()->second->as()->value.get(), "table_for_dict"); + EXPECT_EQ(children[5]->as()->second->as()->value.safeGet(), "table_for_dict"); /// layout test auto * layout = create->dictionary->layout; @@ -102,9 +102,9 @@ TEST(ParserDictionaryDDL, SimpleDictionary) EXPECT_EQ(attributes_children[1]->as()->name, "second_column"); EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); - EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.get(), 0); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.safeGet(), 0); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -150,8 +150,8 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); EXPECT_EQ(attributes_children[0]->as()->default_value, nullptr); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -195,9 +195,9 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) EXPECT_EQ(attributes_children[1]->as()->name, "second_column"); EXPECT_EQ(attributes_children[2]->as()->name, "third_column"); - EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.get(), 100); - EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.get(), 1); - EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.get(), 2); + EXPECT_EQ(attributes_children[0]->as()->default_value->as()->value.safeGet(), 100); + EXPECT_EQ(attributes_children[1]->as()->default_value->as()->value.safeGet(), 1); + EXPECT_EQ(attributes_children[2]->as()->default_value->as()->value.safeGet(), 2); EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); @@ -248,25 +248,25 @@ TEST(ParserDictionaryDDL, NestedSource) auto children = create->dictionary->source->elements->children; EXPECT_EQ(children[0]->as()->first, "host"); - EXPECT_EQ(children[0]->as()->second->as()->value.get(), "localhost"); + EXPECT_EQ(children[0]->as()->second->as()->value.safeGet(), "localhost"); EXPECT_EQ(children[1]->as()->first, "port"); - EXPECT_EQ(children[1]->as()->second->as()->value.get(), 9000); + EXPECT_EQ(children[1]->as()->second->as()->value.safeGet(), 9000); EXPECT_EQ(children[2]->as()->first, "user"); - EXPECT_EQ(children[2]->as()->second->as()->value.get(), "default"); + EXPECT_EQ(children[2]->as()->second->as()->value.safeGet(), "default"); EXPECT_EQ(children[3]->as()->first, "replica"); auto replica = children[3]->as()->second->children; EXPECT_EQ(replica[0]->as()->first, "host"); - EXPECT_EQ(replica[0]->as()->second->as()->value.get(), "127.0.0.1"); + EXPECT_EQ(replica[0]->as()->second->as()->value.safeGet(), "127.0.0.1"); EXPECT_EQ(replica[1]->as()->first, "priority"); - EXPECT_EQ(replica[1]->as()->second->as()->value.get(), 1); + EXPECT_EQ(replica[1]->as()->second->as()->value.safeGet(), 1); EXPECT_EQ(children[4]->as()->first, "password"); - EXPECT_EQ(children[4]->as()->second->as()->value.get(), ""); + EXPECT_EQ(children[4]->as()->second->as()->value.safeGet(), ""); } diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 58bf4c1a2fc..30301b242db 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -185,7 +185,7 @@ namespace DB } else { - auto value = static_cast(column[value_i].get>().getValue()); + auto value = static_cast(column[value_i].safeGet>().getValue()); if (need_rescale) { if (common::mulOverflow(value, rescale_multiplier, value)) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 06e8668cd7c..566a036d79c 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -208,20 +208,20 @@ private: /// Do not replace empty array and array of NULLs if (literal->value.getType() == Field::Types::Array) { - const Array & array = literal->value.get(); + const Array & array = literal->value.safeGet(); auto not_null = std::find_if_not(array.begin(), array.end(), [](const auto & elem) { return elem.isNull(); }); if (not_null == array.end()) return true; } else if (literal->value.getType() == Field::Types::Map) { - const Map & map = literal->value.get(); + const Map & map = literal->value.safeGet(); if (map.size() % 2) return false; } else if (literal->value.getType() == Field::Types::Tuple) { - const Tuple & tuple = literal->value.get(); + const Tuple & tuple = literal->value.safeGet(); for (const auto & value : tuple) if (value.isNull()) diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 649721f28bf..58bec8120f1 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -262,7 +262,7 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy { case orc::BOOLEAN: { /// May throw exception - auto val = field.get(); + auto val = field.safeGet(); return orc::Literal(val != 0); } case orc::BYTE: @@ -275,7 +275,7 @@ convertFieldToORCLiteral(const orc::Type & orc_type, const Field & field, DataTy /// SELECT * FROM file('t.orc', ORC, 'x UInt8') WHERE x > 10 /// We have to reject this, otherwise it would miss values > 127 (because /// they're treated as negative by ORC). - auto val = field.get(); + auto val = field.safeGet(); return orc::Literal(val); } case orc::FLOAT: diff --git a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp index 3578401a0f8..b43c195f201 100644 --- a/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrometheusTextOutputFormat.cpp @@ -286,10 +286,10 @@ static void columnMapToContainer(const ColumnMap * col_map, size_t row_num, Cont { Field field; col_map->get(row_num, field); - const auto & map_field = field.get(); + const auto & map_field = field.safeGet(); for (const auto & map_element : map_field) { - const auto & map_entry = map_element.get(); + const auto & map_entry = map_element.safeGet(); String entry_key; String entry_value; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index de34a8aa04f..9839f64b947 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -333,7 +333,7 @@ namespace { const DataTypeTuple & type_tuple = static_cast(data_type); - Tuple & tuple_value = value.get(); + Tuple & tuple_value = value.safeGet(); size_t src_tuple_size = tuple_value.size(); size_t dst_tuple_size = type_tuple.getElements().size(); @@ -360,7 +360,7 @@ namespace if (element_type.isNullable()) return; - Array & array_value = value.get(); + Array & array_value = value.safeGet(); size_t array_value_size = array_value.size(); for (size_t i = 0; i < array_value_size; ++i) @@ -378,12 +378,12 @@ namespace const auto & key_type = *type_map.getKeyType(); const auto & value_type = *type_map.getValueType(); - auto & map = value.get(); + auto & map = value.safeGet(); size_t map_size = map.size(); for (size_t i = 0; i < map_size; ++i) { - auto & map_entry = map[i].get(); + auto & map_entry = map[i].safeGet(); auto & entry_key = map_entry[0]; auto & entry_value = map_entry[1]; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index e2c6371c44f..80c00f91d82 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -127,14 +127,14 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, Row right(left.size()); for (size_t col_num : desc.key_col_nums) - right[col_num] = (*raw_columns[col_num])[row_number].template get(); + right[col_num] = (*raw_columns[col_num])[row_number].template safeGet(); for (size_t col_num : desc.val_col_nums) - right[col_num] = (*raw_columns[col_num])[row_number].template get(); + right[col_num] = (*raw_columns[col_num])[row_number].template safeGet(); auto at_ith_column_jth_row = [&](const Row & matrix, size_t i, size_t j) -> const Field & { - return matrix[i].get()[j]; + return matrix[i].safeGet()[j]; }; auto tuple_of_nth_columns_at_jth_row = [&](const Row & matrix, const ColumnNumbers & col_nums, size_t j) -> Array @@ -160,7 +160,7 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, auto merge = [&](const Row & matrix) { - size_t rows = matrix[desc.key_col_nums[0]].get().size(); + size_t rows = matrix[desc.key_col_nums[0]].safeGet().size(); for (size_t j = 0; j < rows; ++j) { @@ -190,10 +190,10 @@ static bool mergeMap(const SummingSortedAlgorithm::MapDescription & desc, for (const auto & key_value : merged) { for (size_t col_num_index = 0, size = desc.key_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.key_col_nums[col_num_index]].get()[row_num] = key_value.first[col_num_index]; + row[desc.key_col_nums[col_num_index]].safeGet()[row_num] = key_value.first[col_num_index]; for (size_t col_num_index = 0, size = desc.val_col_nums.size(); col_num_index < size; ++col_num_index) - row[desc.val_col_nums[col_num_index]].get()[row_num] = key_value.second[col_num_index]; + row[desc.val_col_nums[col_num_index]].safeGet()[row_num] = key_value.second[col_num_index]; ++row_num; } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 2080e29ceba..596d08845e1 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -119,23 +119,23 @@ using RangesWithStep = std::vector; std::optional steppedRangeFromRange(const Range & r, UInt64 step, UInt64 remainder) { - if ((r.right.get() == 0) && (!r.right_included)) + if ((r.right.safeGet() == 0) && (!r.right_included)) return std::nullopt; - UInt64 begin = (r.left.get() / step) * step; + UInt64 begin = (r.left.safeGet() / step) * step; if (begin > std::numeric_limits::max() - remainder) return std::nullopt; begin += remainder; - while ((r.left_included <= r.left.get()) && (begin <= r.left.get() - r.left_included)) + while ((r.left_included <= r.left.safeGet()) && (begin <= r.left.safeGet() - r.left_included)) { if (std::numeric_limits::max() - step < begin) return std::nullopt; begin += step; } - if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) + if ((begin >= r.right_included) && (begin - r.right_included >= r.right.safeGet())) return std::nullopt; - UInt64 right_edge_included = r.right.get() - (1 - r.right_included); + UInt64 right_edge_included = r.right.safeGet() - (1 - r.right_included); return std::optional{RangeWithStep{begin, step, static_cast(right_edge_included - begin) / step + 1}}; } diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 5d533a7747e..52be9a6e84a 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -219,11 +219,11 @@ namespace read_bytes_size += 8; break; case ValueType::vtEnum8: - assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).get()); + assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).safeGet()); read_bytes_size += assert_cast(column).byteSize(); break; case ValueType::vtEnum16: - assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).get()); + assert_cast(column).insertValue(assert_cast &>(data_type).castToValue(value.data()).safeGet()); read_bytes_size += assert_cast(column).byteSize(); break; case ValueType::vtString: diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 9601f821cc8..95f4a674ebb 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -62,7 +62,7 @@ static FillColumnDescription::StepFunction getStepFunction( case IntervalKind::Kind::NAME: \ return [step, scale, &date_lut](Field & field) { \ field = Add##NAME##sImpl::execute(static_cast(\ - field.get()), static_cast(step), date_lut, utc_time_zone, scale); }; + field.safeGet()), static_cast(step), date_lut, utc_time_zone, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE @@ -139,21 +139,21 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & { if (which.isDate() || which.isDate32()) { - Int64 avg_seconds = descr.fill_step.get() * descr.step_kind->toAvgSeconds(); + Int64 avg_seconds = descr.fill_step.safeGet() * descr.step_kind->toAvgSeconds(); if (std::abs(avg_seconds) < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Value of step is to low ({} seconds). Must be >= 1 day", std::abs(avg_seconds)); } if (which.isDate()) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), DateLUT::instance()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), DateLUT::instance()); else if (which.isDate32()) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), DateLUT::instance()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), DateLUT::instance()); else if (const auto * date_time = checkAndGetDataType(type.get())) - descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.get(), date_time->getTimeZone()); + descr.step_func = getStepFunction(*descr.step_kind, descr.fill_step.safeGet(), date_time->getTimeZone()); else if (const auto * date_time64 = checkAndGetDataType(type.get())) { - const auto & step_dec = descr.fill_step.get &>(); + const auto & step_dec = descr.fill_step.safeGet &>(); Int64 step = DecimalUtils::convertTo(step_dec.getValue(), step_dec.getScale()); static const DateLUTImpl & utc_time_zone = DateLUT::instance("UTC"); @@ -163,7 +163,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & case IntervalKind::Kind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ - auto field_decimal = field.get>(); \ + auto field_decimal = field.safeGet>(); \ auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, utc_time_zone, field_decimal.getScale()); \ field = DecimalField(res, field_decimal.getScale()); \ }; \ diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index f76e2d64368..32066adad0d 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -107,7 +107,7 @@ static int compareValuesWithOffset(const IColumn * _compared_column, using ValueType = typename ColumnType::ValueType; // Note that the storage type of offset returned by get<> is different, so // we need to specify the type explicitly. - const ValueType offset = static_cast(_offset.get()); + const ValueType offset = static_cast(_offset.safeGet()); assert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); @@ -162,7 +162,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, _compared_column); const auto * reference_column = assert_cast( _reference_column); - const auto offset = _offset.get(); + const auto offset = _offset.safeGet(); chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); @@ -631,7 +631,7 @@ void WindowTransform::advanceFrameStartRowsOffset() { // Just recalculate it each time by walking blocks. const auto [moved_row, offset_left] = moveRowNumber(current_row, - window_description.frame.begin_offset.get() + window_description.frame.begin_offset.safeGet() * (window_description.frame.begin_preceding ? -1 : 1)); frame_start = moved_row; @@ -870,7 +870,7 @@ void WindowTransform::advanceFrameEndRowsOffset() // Walk the specified offset from the current row. The "+1" is needed // because the frame_end is a past-the-end pointer. const auto [moved_row, offset_left] = moveRowNumber(current_row, - window_description.frame.end_offset.get() + window_description.frame.end_offset.safeGet() * (window_description.frame.end_preceding ? -1 : 1) + 1); @@ -2192,13 +2192,13 @@ namespace throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' function must be a constant"); auto type_id = argument_types[0]->getTypeId(); if (type_id == TypeIndex::UInt8) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt16) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt32) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); else if (type_id == TypeIndex::UInt64) - buckets = arg_col[transform->current_row.row].get(); + buckets = arg_col[transform->current_row.row].safeGet(); if (!buckets) { @@ -2490,7 +2490,7 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction { offset = (*current_block.input_columns[ workspace.argument_column_indices[1]])[ - transform->current_row.row].get(); + transform->current_row.row].safeGet(); /// Either overflow or really negative value, both is not acceptable. if (offset < 0) @@ -2576,7 +2576,7 @@ struct WindowFunctionNthValue final : public WindowFunction Int64 offset = (*current_block.input_columns[ workspace.argument_column_indices[1]])[ - transform->current_row.row].get(); + transform->current_row.row].safeGet(); /// Either overflow or really negative value, both is not acceptable. if (offset <= 0) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7891042bb96..4869819d0d6 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -109,7 +109,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = typeid_cast(*ast_col_decl.comment); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); } if (ast_col_decl.codec) @@ -167,7 +167,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.comment) { const auto & ast_comment = ast_col_decl.comment->as(); - command.comment.emplace(ast_comment.value.get()); + command.comment.emplace(ast_comment.value.safeGet()); } if (ast_col_decl.ttl) @@ -210,7 +210,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.type = COMMENT_COLUMN; command.column_name = getIdentifierName(command_ast->column); const auto & ast_comment = command_ast->comment->as(); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); command.if_exists = command_ast->if_exists; return command; } @@ -220,7 +220,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.ast = command_ast->clone(); command.type = COMMENT_TABLE; const auto & ast_comment = command_ast->comment->as(); - command.comment = ast_comment.value.get(); + command.comment = ast_comment.value.safeGet(); return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_ORDER_BY) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index da749812167..0d724245b49 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -197,7 +197,7 @@ void ColumnDescription::readText(ReadBuffer & buf) } if (col_ast->comment) - comment = col_ast->comment->as().value.get(); + comment = col_ast->comment->as().value.safeGet(); if (col_ast->codec) codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true, true); diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp index 7354243732c..d6a8af3238e 100644 --- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp +++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp @@ -268,7 +268,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Float64) { out.function = RPNElement::FUNCTION_FLOAT_LITERAL; - out.float_literal.emplace(const_value.get()); + out.float_literal.emplace(const_value.safeGet()); out.func_name = "Float literal"; return true; } @@ -276,7 +276,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::UInt64) { out.function = RPNElement::FUNCTION_INT_LITERAL; - out.int_literal.emplace(const_value.get()); + out.int_literal.emplace(const_value.safeGet()); out.func_name = "Int literal"; return true; } @@ -284,7 +284,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Int64) { out.function = RPNElement::FUNCTION_INT_LITERAL; - out.int_literal.emplace(const_value.get()); + out.int_literal.emplace(const_value.safeGet()); out.func_name = "Int literal"; return true; } @@ -292,7 +292,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Tuple) { out.function = RPNElement::FUNCTION_LITERAL_TUPLE; - out.tuple_literal = const_value.get(); + out.tuple_literal = const_value.safeGet(); out.func_name = "Tuple literal"; return true; } @@ -300,7 +300,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::Array) { out.function = RPNElement::FUNCTION_LITERAL_ARRAY; - out.array_literal = const_value.get(); + out.array_literal = const_value.safeGet(); out.func_name = "Array literal"; return true; } @@ -308,7 +308,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node if (const_value.getType() == Field::Types::String) { out.function = RPNElement::FUNCTION_STRING_LITERAL; - out.func_name = const_value.get(); + out.func_name = const_value.safeGet(); return true; } } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3a44359b537..0db05373e43 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -428,7 +428,7 @@ std::pair IMergeTreeDataPart::getMinMaxDate() const if (storage.minmax_idx_date_column_pos != -1 && minmax_idx->initialized) { const auto & hyperrectangle = minmax_idx->hyperrectangle[storage.minmax_idx_date_column_pos]; - return {DayNum(hyperrectangle.left.get()), DayNum(hyperrectangle.right.get())}; + return {DayNum(hyperrectangle.left.safeGet()), DayNum(hyperrectangle.right.safeGet())}; } else return {}; @@ -444,15 +444,15 @@ std::pair IMergeTreeDataPart::getMinMaxTime() const if (hyperrectangle.left.getType() == Field::Types::UInt64) { assert(hyperrectangle.right.getType() == Field::Types::UInt64); - return {hyperrectangle.left.get(), hyperrectangle.right.get()}; + return {hyperrectangle.left.safeGet(), hyperrectangle.right.safeGet()}; } /// The case of DateTime64 else if (hyperrectangle.left.getType() == Field::Types::Decimal64) { assert(hyperrectangle.right.getType() == Field::Types::Decimal64); - auto left = hyperrectangle.left.get>(); - auto right = hyperrectangle.right.get>(); + auto left = hyperrectangle.left.safeGet>(); + auto right = hyperrectangle.right.safeGet>(); assert(left.getScale() == right.getScale()); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 69bffac9160..a717c08f0a2 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -349,7 +349,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = extractFixedPrefixFromLikePattern(value.get(), /*requires_perfect_prefix*/ false); + String prefix = extractFixedPrefixFromLikePattern(value.safeGet(), /*requires_perfect_prefix*/ false); if (prefix.empty()) return false; @@ -370,7 +370,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = extractFixedPrefixFromLikePattern(value.get(), /*requires_perfect_prefix*/ true); + String prefix = extractFixedPrefixFromLikePattern(value.safeGet(), /*requires_perfect_prefix*/ true); if (prefix.empty()) return false; @@ -391,7 +391,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - String prefix = value.get(); + String prefix = value.safeGet(); if (prefix.empty()) return false; @@ -412,7 +412,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map if (value.getType() != Field::Types::String) return false; - const String & expression = value.get(); + const String & expression = value.safeGet(); /// This optimization can't process alternation - this would require /// a comprehensive parsing of regular expression. @@ -2918,8 +2918,8 @@ BoolMask KeyCondition::checkInHyperrectangle( /// Let's support only the case of 2d, because I'm not confident in other cases. if (num_dimensions == 2) { - UInt64 left = key_range.left.get(); - UInt64 right = key_range.right.get(); + UInt64 left = key_range.left.safeGet(); + UInt64 right = key_range.right.safeGet(); BoolMask mask(false, true); auto hyperrectangle_intersection_callback = [&](std::array, 2> curve_hyperrectangle) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2e10f5a0227..830f6f5d52f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5869,7 +5869,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc if (partition_lit && partition_lit->value.getType() == Field::Types::String) { MergeTreePartInfo::validatePartitionID(partition_ast.value->clone(), format_version); - return partition_lit->value.get(); + return partition_lit->value.safeGet(); } } @@ -5932,7 +5932,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", partition_key_value.getTypeName()); - const Tuple & tuple = partition_key_value.get(); + const Tuple & tuple = partition_key_value.safeGet(); if (tuple.size() != fields_count) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of fields in the partition expression: {}, must be: {}", tuple.size(), fields_count); @@ -6868,7 +6868,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( auto * place = arena.alignedAlloc(size_of_state, align_of_state); func->create(place); if (const AggregateFunctionCount * agg_count = typeid_cast(func.get())) - AggregateFunctionCount::set(place, value.get()); + AggregateFunctionCount::set(place, value.safeGet()); else { auto value_column = func->getArgumentTypes().front()->createColumnConst(1, value)->convertToFullColumnIfConst(); @@ -7510,7 +7510,7 @@ MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & parti if (const auto * partition_lit = partition_ast->as().value->as()) { id = partition_lit->value.getType() == Field::Types::UInt64 - ? toString(partition_lit->value.get()) + ? toString(partition_lit->value.safeGet()) : partition_lit->value.safeGet(); prefixed = true; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index ee3ac4207cc..98f1563546e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -441,8 +441,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.get()); - DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.get()); + DayNum min_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].left.safeGet()); + DayNum max_date(minmax_idx->hyperrectangle[data.minmax_idx_date_column_pos].right.safeGet()); const auto & date_lut = DateLUT::serverTimezoneInstance(); diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 497e86334f3..b68e48eeb3a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -343,12 +343,12 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2; String distance_function = DEFAULT_DISTANCE_FUNCTION; if (!index.arguments.empty()) - distance_function = index.arguments[0].get(); + distance_function = index.arguments[0].safeGet(); static constexpr auto DEFAULT_TREES = 100uz; UInt64 trees = DEFAULT_TREES; if (index.arguments.size() > 1) - trees = index.arguments[1].get(); + trees = index.arguments[1].safeGet(); return std::make_shared(index, trees, distance_function); } @@ -375,7 +375,7 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { - String distance_name = index.arguments[0].get(); + String distance_name = index.arguments[0].safeGet(); if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index c6a00751f25..be0ee693e15 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -348,19 +348,19 @@ bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTre { if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -692,7 +692,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( const bool is_nullable = actual_type->isNullable(); auto mutable_column = actual_type->createColumn(); - for (const auto & f : value_field.get()) + for (const auto & f : value_field.safeGet()) { if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) /// NOLINT(readability-static-accessed-through-instance) return false; @@ -763,7 +763,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (which.isTuple() && key_node_function_name == "tuple") { - const Tuple & tuple = value_field.get(); + const Tuple & tuple = value_field.safeGet(); const auto * value_tuple_data_type = typeid_cast(value_type.get()); if (tuple.size() != key_node_function_arguments_size) @@ -952,7 +952,7 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach) { const auto & argument = index.arguments[0]; - if (!attach && (argument.getType() != Field::Types::Float64 || argument.get() < 0 || argument.get() > 1)) + if (!attach && (argument.getType() != Field::Types::Float64 || argument.safeGet() < 0 || argument.safeGet() > 1)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The BloomFilter false positive must be a double number between 0 and 1."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 5b6813d12e3..857b7903588 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -341,19 +341,19 @@ bool MergeTreeConditionBloomFilterText::extractAtomFromTree(const RPNBuilderTree if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.0 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -493,7 +493,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - auto value = const_value.get(); + auto value = const_value.safeGet(); if (is_case_insensitive_scenario) std::ranges::transform(value, value.begin(), [](const auto & c) { return static_cast(std::tolower(c)); }); @@ -509,7 +509,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -519,7 +519,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_HAS; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -529,7 +529,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -538,7 +538,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -547,7 +547,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -556,7 +556,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToBloomFilter(value.data(), value.size(), *out.bloom_filter); return true; } @@ -565,7 +565,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, true, false); return true; } @@ -574,7 +574,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.key_column = *key_index; out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToBloomFilter(value.data(), value.size(), *out.bloom_filter, false, true); return true; } @@ -589,13 +589,13 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( /// 2d vector is not needed here but is used because already exists for FUNCTION_IN std::vector> bloom_filters; bloom_filters.emplace_back(); - for (const auto & element : const_value.get()) + for (const auto & element : const_value.safeGet()) { if (element.getType() != Field::Types::String) return false; bloom_filters.back().emplace_back(params); - const auto & value = element.get(); + const auto & value = element.safeGet(); if (function_name == "multiSearchAny") { @@ -615,7 +615,7 @@ bool MergeTreeConditionBloomFilterText::traverseTreeEquals( out.function = RPNElement::FUNCTION_MATCH; out.bloom_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); String required_substring; bool dummy_is_trivial, dummy_required_substring_is_prefix; std::vector alternatives; @@ -743,11 +743,11 @@ MergeTreeIndexPtr bloomFilterIndexTextCreator( { if (index.type == NgramTokenExtractor::getName()) { - size_t n = index.arguments[0].get(); + size_t n = index.arguments[0].safeGet(); BloomFilterParameters params( - index.arguments[1].get(), - index.arguments[2].get(), - index.arguments[3].get()); + index.arguments[1].safeGet(), + index.arguments[2].safeGet(), + index.arguments[3].safeGet()); auto tokenizer = std::make_unique(n); @@ -756,9 +756,9 @@ MergeTreeIndexPtr bloomFilterIndexTextCreator( else if (index.type == SplitTokenExtractor::getName()) { BloomFilterParameters params( - index.arguments[0].get(), - index.arguments[1].get(), - index.arguments[2].get()); + index.arguments[0].safeGet(), + index.arguments[1].safeGet(), + index.arguments[2].safeGet()); auto tokenizer = std::make_unique(); @@ -815,9 +815,9 @@ void bloomFilterIndexTextValidator(const IndexDescription & index, bool /*attach /// Just validate BloomFilterParameters params( - index.arguments[0].get(), - index.arguments[1].get(), - index.arguments[2].get()); + index.arguments[0].safeGet(), + index.arguments[1].safeGet(), + index.arguments[2].safeGet()); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index cd6af68ebcc..b5c6bb95d37 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -74,7 +74,7 @@ void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr, MergeTr for (auto & gin_filter : gin_filters) { size_serialization->deserializeBinary(field_rows, istr, {}); - size_t filter_size = field_rows.get(); + size_t filter_size = field_rows.safeGet(); gin_filter.getFilter().resize(filter_size); if (filter_size == 0) @@ -379,19 +379,19 @@ bool MergeTreeConditionFullText::traverseAtomAST(const RPNBuilderTreeNode & node /// Check constant like in KeyCondition if (const_value.getType() == Field::Types::UInt64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Int64) { - out.function = const_value.get() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } if (const_value.getType() == Field::Types::Float64) { - out.function = const_value.get() != 0.00 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; + out.function = const_value.safeGet() != 0.00 ? RPNElement::ALWAYS_TRUE : RPNElement::ALWAYS_FALSE; return true; } } @@ -530,7 +530,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_HAS; out.gin_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -539,7 +539,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_HAS; out.gin_filter = std::make_unique(params); - auto & value = const_value.get(); + auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -549,7 +549,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -558,7 +558,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -567,7 +567,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -576,7 +576,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_NOT_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringLikeToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -585,7 +585,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->stringToGinFilter(value.data(), value.size(), *out.gin_filter); return true; } @@ -594,7 +594,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, true, false); return true; } @@ -603,7 +603,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_EQUALS; out.gin_filter = std::make_unique(params); - const auto & value = const_value.get(); + const auto & value = const_value.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), *out.gin_filter, false, true); return true; } @@ -615,13 +615,13 @@ bool MergeTreeConditionFullText::traverseASTEquals( /// 2d vector is not needed here but is used because already exists for FUNCTION_IN std::vector gin_filters; gin_filters.emplace_back(); - for (const auto & element : const_value.get()) + for (const auto & element : const_value.safeGet()) { if (element.getType() != Field::Types::String) return false; gin_filters.back().emplace_back(params); - const auto & value = element.get(); + const auto & value = element.safeGet(); token_extractor->substringToGinFilter(value.data(), value.size(), gin_filters.back().back(), false, false); } out.set_gin_filters = std::move(gin_filters); @@ -632,7 +632,7 @@ bool MergeTreeConditionFullText::traverseASTEquals( out.key_column = key_column_num; out.function = RPNElement::FUNCTION_MATCH; - auto & value = const_value.get(); + auto & value = const_value.safeGet(); String required_substring; bool dummy_is_trivial, dummy_required_substring_is_prefix; std::vector alternatives; @@ -776,8 +776,8 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( MergeTreeIndexPtr fullTextIndexCreator( const IndexDescription & index) { - size_t n = index.arguments.empty() ? 0 : index.arguments[0].get(); - UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get(); + size_t n = index.arguments.empty() ? 0 : index.arguments[0].safeGet(); + UInt64 max_rows = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].safeGet(); GinFilterParameters params(n, max_rows); /// Use SplitTokenExtractor when n is 0, otherwise use NgramTokenExtractor @@ -826,12 +826,12 @@ void fullTextIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.arguments[1].getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INCORRECT_QUERY, "The second full text index argument must be UInt64"); - if (index.arguments[1].get() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].get() < MIN_ROWS_PER_POSTINGS_LIST) + if (index.arguments[1].safeGet() != UNLIMITED_ROWS_PER_POSTINGS_LIST && index.arguments[1].safeGet() < MIN_ROWS_PER_POSTINGS_LIST) throw Exception(ErrorCodes::INCORRECT_QUERY, "The maximum rows per postings list must be no less than {}", MIN_ROWS_PER_POSTINGS_LIST); } /// Just validate - size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); - UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].get(); + size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].safeGet(); + UInt64 max_rows_per_postings_list = index.arguments.size() < 2 ? DEFAULT_MAX_ROWS_PER_POSTINGS_LIST : index.arguments[1].safeGet(); GinFilterParameters params(ngrams, max_rows_per_postings_list); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index cd8065ecadf..abf3ae56376 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -37,7 +37,7 @@ void MergeTreeIndexGranuleHypothesis::deserializeBinary(ReadBuffer & istr, Merge Field field_met; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_met, istr, {}); - met = field_met.get(); + met = field_met.safeGet(); is_empty = false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index a92df4ac72d..fa242fccbc1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -97,7 +97,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); - size_t rows_to_read = field_rows.get(); + size_t rows_to_read = field_rows.safeGet(); if (rows_to_read == 0) return; @@ -591,7 +591,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) { - size_t max_rows = index.arguments[0].get(); + size_t max_rows = index.arguments[0].safeGet(); return std::make_shared(index, max_rows); } diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp index 59a4b0fbf9c..efd9bb754e1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp @@ -377,12 +377,12 @@ MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index) static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2; String distance_function = default_distance_function; if (!index.arguments.empty()) - distance_function = index.arguments[0].get(); + distance_function = index.arguments[0].safeGet(); static constexpr auto default_scalar_kind = unum::usearch::scalar_kind_t::f16_k; auto scalar_kind = default_scalar_kind; if (index.arguments.size() > 1) - scalar_kind = nameToScalarKind.at(index.arguments[1].get()); + scalar_kind = nameToScalarKind.at(index.arguments[1].safeGet()); return std::make_shared(index, distance_function, scalar_kind); } @@ -408,14 +408,14 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { - String distance_name = index.arguments[0].get(); + String distance_name = index.arguments[0].safeGet(); if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) throw Exception(ErrorCodes::INCORRECT_DATA, "USearch index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } /// Check that a supported kind was passed as a second argument - if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].get())) + if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].safeGet())) { String supported_kinds; for (const auto & [name, kind] : nameToScalarKind) diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index b240f80ee13..5b5bc244f92 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -241,7 +241,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::serverTimezoneInstance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); else if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) - result += toString(value[i].get().toUnderType()); + result += toString(value[i].safeGet().toUnderType()); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 43c40dee77d..f0c26c302e1 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -222,17 +222,17 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet /// check the value with respect to threshold if (type == Field::Types::UInt64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value > threshold; } else if (type == Field::Types::Int64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value < -threshold || threshold < value; } else if (type == Field::Types::Float64) { - const auto value = output_value.get(); + const auto value = output_value.safeGet(); return value < -threshold || threshold < value; } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 3f0603f6900..52857845e99 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -591,7 +591,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (ast->value.getType() != Field::Types::String) throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); - graphite_config_name = ast->value.get(); + graphite_config_name = ast->value.safeGet(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index ade3326288a..fd9f29b6375 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -32,19 +32,19 @@ std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) switch (field.getType()) { case Field::Types::Int64: - return field.get(); + return field.safeGet(); case Field::Types::UInt64: - return field.get(); + return field.safeGet(); case Field::Types::Float64: - return field.get(); + return field.safeGet(); case Field::Types::Int128: - return field.get(); + return field.safeGet(); case Field::Types::UInt128: - return field.get(); + return field.safeGet(); case Field::Types::Int256: - return field.get(); + return field.safeGet(); case Field::Types::UInt256: - return field.get(); + return field.safeGet(); default: return {}; } @@ -53,7 +53,7 @@ std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) std::optional StatisticsUtils::tryConvertToString(const DB::Field & field) { if (field.getType() == Field::Types::String) - return field.get(); + return field.safeGet(); return {}; } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index e69bbc1515b..a6f2f883e65 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -48,7 +48,7 @@ Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); if (isStringOrFixedString(data_type)) - return sketch.get_estimate(val.get()); + return sketch.get_estimate(val.safeGet()); throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); } diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index 060b271d8f4..b95ccedb093 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -203,7 +203,7 @@ StoragePtr StorageFactory::get( } if (query.comment) - comment = query.comment->as().value.get(); + comment = query.comment->as().value.safeGet(); ASTs empty_engine_args; Arguments arguments{ diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 98cd5c4dfa9..20b02b59a10 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -2203,11 +2203,11 @@ void registerStorageFile(StorageFactory & factory) { auto type = literal->value.getType(); if (type == Field::Types::Int64) - source_fd = static_cast(literal->value.get()); + source_fd = static_cast(literal->value.safeGet()); else if (type == Field::Types::UInt64) - source_fd = static_cast(literal->value.get()); + source_fd = static_cast(literal->value.safeGet()); else if (type == Field::Types::String) - StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); + StorageFile::parseFileSource(literal->value.safeGet(), source_path, storage_args.path_to_archive); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 9950d41f1c2..fc73f246d35 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -419,7 +419,7 @@ void fuzzJSONObject( if (val.fixed->getType() == Field::Types::Which::String) { out << fuzzJSONStructure(config, rnd, "\""); - writeText(val.fixed->get(), out); + writeText(val.fixed->safeGet(), out); out << fuzzJSONStructure(config, rnd, "\""); } else diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index a0d6cf11b64..136df425813 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -341,10 +341,10 @@ void registerStorageJoin(StorageFactory & factory) else if (setting.name == "any_join_distinct_right_table_keys") old_any_join = setting.value; else if (setting.name == "disk") - disk_name = setting.value.get(); + disk_name = setting.value.safeGet(); else if (setting.name == "persistent") { - persistent = setting.value.get(); + persistent = setting.value.safeGet(); } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown setting {} for storage {}", setting.name, args.engine_name); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index d3214e7ed13..e0a4af68824 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -294,7 +294,7 @@ public: { const auto * array_type = typeid_cast(data_type.get()); const auto & nested = array_type->getNestedType(); - const auto & array = array_field.get(); + const auto & array = array_field.safeGet(); if (!isArray(nested)) { @@ -312,7 +312,7 @@ public: if (!isArray(nested_array_type->getNestedType())) { - parseArrayContent(iter->get(), nested, ostr); + parseArrayContent(iter->safeGet(), nested, ostr); } else { diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6a7810b97f9..522884b4fef 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -105,8 +105,8 @@ protected: while (rows_count < max_block_size && db_table_num < total_tables) { - const std::string database_name = (*databases)[db_table_num].get(); - const std::string table_name = (*tables)[db_table_num].get(); + const std::string database_name = (*databases)[db_table_num].safeGet(); + const std::string table_name = (*tables)[db_table_num].safeGet(); ++db_table_num; ColumnsDescription columns; @@ -426,7 +426,7 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, for (size_t i = 0; i < num_databases; ++i) { - const std::string database_name = (*database_column)[i].get(); + const std::string database_name = (*database_column)[i].safeGet(); if (database_name.empty()) { for (auto & [table_name, table] : external_tables) diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 7ace8ee24aa..c87bdb6d26a 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -138,7 +138,7 @@ StoragesInfoStream::StoragesInfoStream(std::optional filter_by_datab for (size_t i = 0; i < rows; ++i) { - String database_name = (*database_column_for_filter)[i].get(); + String database_name = (*database_column_for_filter)[i].safeGet(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 806af4a7bf8..3be73aeda17 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -52,13 +52,13 @@ public: { StoragesInfo info; - info.database = (*database_column)[next_row].get(); - info.table = (*table_column)[next_row].get(); - UUID storage_uuid = (*storage_uuid_column)[next_row].get(); + info.database = (*database_column)[next_row].safeGet(); + info.table = (*table_column)[next_row].safeGet(); + UUID storage_uuid = (*storage_uuid_column)[next_row].safeGet(); auto is_same_table = [&storage_uuid, this] (size_t row) -> bool { - return (*storage_uuid_column)[row].get() == storage_uuid; + return (*storage_uuid_column)[row].safeGet() == storage_uuid; }; /// We may have two rows per table which differ in 'active' value. @@ -66,7 +66,7 @@ public: /// must collect the inactive parts. Remember this fact in StoragesInfo. for (; next_row < rows && is_same_table(next_row); ++next_row) { - const auto active = (*active_column)[next_row].get(); + const auto active = (*active_column)[next_row].safeGet(); if (active == 0) info.need_inactive_parts = true; } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 73b7908b75c..ea837da1e73 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -51,7 +51,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) const ColumnWithTypeAndName & data = block.getByName(name); size_t rows = block.rows(); for (size_t i = 0; i < rows; ++i) - res.insert((*data.column)[i].get()); + res.insert((*data.column)[i].safeGet()); return res; } diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 9d23f132759..1408e120bc5 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -102,16 +102,16 @@ ColumnsDescription getStructureOfRemoteTableInShard( { ColumnDescription column; - column.name = (*name)[i].get(); + column.name = (*name)[i].safeGet(); - String data_type_name = (*type)[i].get(); + String data_type_name = (*type)[i].safeGet(); column.type = data_type_factory.get(data_type_name); - String kind_name = (*default_kind)[i].get(); + String kind_name = (*default_kind)[i].safeGet(); if (!kind_name.empty()) { column.default_desc.kind = columnDefaultKindFromString(kind_name); - String expr_str = (*default_expr)[i].get(); + String expr_str = (*default_expr)[i].safeGet(); column.default_desc.expression = parseQuery( expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", 0, settings.max_parser_depth, settings.max_parser_backtracks); @@ -207,8 +207,8 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( size_t size = name_col.size(); for (size_t i = 0; i < size; ++i) { - auto name = name_col[i].get(); - auto type_name = type_col[i].get(); + auto name = name_col[i].safeGet(); + auto type_name = type_col[i].safeGet(); auto storage_column = storage_columns.tryGetPhysical(name); if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated()) diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 552b9fde986..69d24c879bd 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -83,7 +83,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt "Table function '{}' requires a String argument for EXPLAIN kind, got '{}'", getName(), queryToString(kind_arg)); - ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.get()); + ASTExplainQuery::ExplainKind kind = ASTExplainQuery::fromString(kind_literal->value.safeGet()); auto explain_query = std::make_shared(kind); const auto * settings_arg = function->arguments->children[1]->as(); @@ -92,7 +92,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt "Table function '{}' requires a serialized string settings argument, got '{}'", getName(), queryToString(function->arguments->children[1])); - const auto & settings_str = settings_arg->value.get(); + const auto & settings_str = settings_arg->value.safeGet(); if (!settings_str.empty()) { const Settings & settings = context->getSettingsRef(); diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 1b6d86f8fa5..e59ee52fd82 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -47,7 +47,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { fd = static_cast( - (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); + (type == Field::Types::Int64) ? literal->value.safeGet() : literal->value.safeGet()); if (fd < 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File descriptor must be non-negative"); } diff --git a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp index 06a48f0e25f..27ed50fb711 100644 --- a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp +++ b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp @@ -76,9 +76,9 @@ void TableFunctionMergeTreeIndex::parseArguments(const ASTPtr & ast_function, Co "Table function '{}' expected bool flag for 'with_marks' argument", getName()); if (value.getType() == Field::Types::Bool) - with_marks = value.get(); + with_marks = value.safeGet(); else - with_marks = value.get(); + with_marks = value.safeGet(); } if (!params.empty()) From 18d9bb2ade4e98051df007663a387eb74146c26f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 10 Aug 2024 16:25:32 +0200 Subject: [PATCH 101/132] tests: attempt to fix 01600_parts_states_metrics_long (by forbid parallel run) CI: https://s3.amazonaws.com/clickhouse-test-reports/68134/8d4f822fee64d44440459b733c67dee5e9fb1e02/stateless_tests__tsan__s3_storage__[2_4].html Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/01600_parts_states_metrics_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 47b5a4dea13..8062bb0ba5d 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From eeda67042c08bedbd18c3b7a76cb8928e9975348 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 10 Aug 2024 16:28:49 +0200 Subject: [PATCH 102/132] tests: make 01600_parts_states_metrics_long faster Signed-off-by: Azat Khuzhin --- .../01600_parts_states_metrics_long.sh | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 8062bb0ba5d..a07dd306b3e 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -5,6 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +function query() +{ + # NOTE: database_atomic_wait_for_drop_and_detach_synchronously needed only for local env, CI has it ON + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*" +} + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) @@ -18,13 +24,13 @@ verify() { for i in {1..5000} do - result=$( $CLICKHOUSE_CLIENT --query="$verify_sql" ) + result=$( query "$verify_sql" ) [ "$result" = "1" ] && echo "$result" && break sleep 0.1 if [[ $i -eq 5000 ]] then - $CLICKHOUSE_CLIENT " + query " SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; SELECT sum(active), sum(NOT active) FROM system.parts; SELECT sum(active), sum(NOT active) FROM system.projection_parts; @@ -34,17 +40,17 @@ verify() done } -$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" +query "DROP TABLE IF EXISTS test_table" +query "CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" -$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +query "INSERT INTO test_table VALUES ('1992-01-01')" verify -$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +query "INSERT INTO test_table VALUES ('1992-01-02')" verify -$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" +query "OPTIMIZE TABLE test_table FINAL" verify -$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +query "DROP TABLE test_table" verify From 0a8fb05ece2771439844c03456d43b02eb8f51cd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 10 Aug 2024 16:23:23 +0000 Subject: [PATCH 103/132] fix after merge --- src/Core/Field.h | 7 +++++++ src/Disks/DiskFomAST.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 689ac38a235..13741183f21 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -869,6 +869,13 @@ constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t) || t == Field::Types::UInt64; } +constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t) +{ + return t == Field::Types::Int64 + || t == Field::Types::UInt64 + || t == Field::Types::Bool; +} + template auto & Field::safeGet() { diff --git a/src/Disks/DiskFomAST.cpp b/src/Disks/DiskFomAST.cpp index b2f1280c507..5329ff8748a 100644 --- a/src/Disks/DiskFomAST.cpp +++ b/src/Disks/DiskFomAST.cpp @@ -132,7 +132,7 @@ std::string DiskFomAST::createCustomDisk(const ASTPtr & disk_function_ast, Conte FlattenDiskConfigurationVisitor::Data data{context, attach}; FlattenDiskConfigurationVisitor{data}.visit(ast); - return assert_cast(*ast).value.get(); + return assert_cast(*ast).value.safeGet(); } void DiskFomAST::ensureDiskIsNotCustom(const std::string & disk_name, ContextPtr context) From 0a536cbf150916bff0ec63c6ef3b77df517868f8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Sat, 10 Aug 2024 18:58:28 +0200 Subject: [PATCH 104/132] Add batch size --- docker/test/stateless/run.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index a030be92506..17e39487e3e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -193,8 +193,8 @@ ORDER BY tuple()" # create minio log webhooks for both audit and server logs # use async inserts to avoid creating too many parts -./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 -./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 +./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 +./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 max_retries=100 retry=1 @@ -376,6 +376,8 @@ done # collect minio audit and server logs +# wait for minio to flush its batch if it has any +sleep 1 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" From d53513a81a10b8230a30fdbb386aca1d067cbcfa Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 10 Aug 2024 18:12:34 +0000 Subject: [PATCH 105/132] fix --- tests/queries/0_stateless/01710_projection_vertical_merges.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index 0f80d659e92..0d745e44b10 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-parallel +-- set no-parallel tag is to prevent timeout of this test drop table if exists t; From 80e926996319b926f3cbebf2050ed4a60666ee71 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 10 Aug 2024 19:39:59 +0000 Subject: [PATCH 106/132] allow UInt64 <-> Int64 conversion --- src/Core/Field.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 13741183f21..ba8c66580ad 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -882,8 +882,11 @@ auto & Field::safeGet() const Types::Which target = TypeToEnum>>::value; /// bool is stored as uint64, will be returned as UInt64 when requested as bool or UInt64, as Int64 when requested as Int64 - if (target != which && !(which == Field::Types::Bool && (target == Field::Types::UInt64 || target == Field::Types::Int64))) - throw Exception(ErrorCodes::BAD_GET, "Bad get: has {}, requested {}", getTypeName(), target); + /// also allow UInt64 <-> Int64 conversion + if (target != which && + !(which == Field::Types::Bool && (target == Field::Types::UInt64 || target == Field::Types::Int64)) && + !(isInt64OrUInt64FieldType(which) && isInt64OrUInt64FieldType(target))) + throw Exception(ErrorCodes::BAD_GET, "Bad get: has {}, requested {}", getTypeName(), target); return get(); } From 556f66987897bd5065426e187bef4e0cba2a975c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 10 Aug 2024 21:52:55 +0000 Subject: [PATCH 107/132] Separate test into separate file to pass bugfix check --- tests/integration/parallel_skip.json | 4 + tests/integration/test_storage_kafka/test.py | 135 ---------- .../test_produce_http_interface.py | 243 ++++++++++++++++++ 3 files changed, 247 insertions(+), 135 deletions(-) create mode 100644 tests/integration/test_storage_kafka/test_produce_http_interface.py diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 99fa626bd1e..fca2126d824 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -162,9 +162,13 @@ "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance_mv", "test_storage_kafka/test.py::test_formats_errors", "test_storage_kafka/test.py::test_multiple_read_in_materialized_views", + "test_storage_kafka/test.py::test_kafka_null_message", + + "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection" + ] diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9b2f465c1b6..4b6c9922d74 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -5524,141 +5524,6 @@ def test_kafka_null_message(kafka_cluster, create_query_generator): ) -def test_kafka_produce_http_interface_row_based_format(kafka_cluster): - # reproduction of #61060 with validating the written messages - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - topic_prefix = "http_row_" - - # It is important to have: - # - long enough messages - # - enough messages - # I don't know the exact requirement for message sizes, but it doesn't reproduce with short messages - # For the number of messages it seems like at least 3 messages is necessary - expected_key = "01234567890123456789" - expected_value = "aaaaabbbbbccccc" - - insert_query_end = f"(key, value) VALUES ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}')" - insert_query_template = "INSERT INTO {table_name} " + insert_query_end - - extra_settings = { - "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", - "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", - "Template": ", format_template_row='string_key_value.format'", - } - - # Only the formats that can be used both and input and output format are tested - # Reasons to exclude following formats: - # - JSONStrings: not actually an input format - # - ProtobufSingle: I cannot make it work to parse the messages. Probably something is broken, - # because the producer can write multiple rows into a same message, which makes them impossible to parse properly. Should added after #67549 is fixed. - # - ProtobufList: I didn't want to deal with the envelope and stuff - # - Npy: supports only single column - # - LineAsString: supports only single column - # - RawBLOB: supports only single column - formats_to_test = [ - "TabSeparated", - "TabSeparatedRaw", - "TabSeparatedWithNames", - "TabSeparatedWithNamesAndTypes", - "TabSeparatedRawWithNames", - "TabSeparatedRawWithNamesAndTypes", - "Template", - "CSV", - "CSVWithNames", - "CSVWithNamesAndTypes", - "CustomSeparated", - "CustomSeparatedWithNames", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONColumns", - "JSONColumnsWithMetadata", - "JSONCompact", - "JSONCompactColumns", - "JSONEachRow", - "JSONStringsEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNames", - "JSONCompactEachRowWithNamesAndTypes", - "JSONCompactStringsEachRow", - "JSONCompactStringsEachRowWithNames", - "JSONCompactStringsEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "BSONEachRow", - "TSKV", - "Protobuf", - "Avro", - "Parquet", - "Arrow", - "ArrowStream", - "ORC", - "RowBinary", - "RowBinaryWithNames", - "RowBinaryWithNamesAndTypes", - "Native", - "CapnProto", - "MsgPack", - ] - for format in formats_to_test: - logging.debug(f"Creating tables and writing messages to {format}") - topic = topic_prefix + format - kafka_create_topic(admin_client, topic) - - extra_setting = extra_settings.get(format, "") - - # kafka_max_rows_per_message is set to 2 to make sure every format produces at least 2 messages, thus increasing the chance of catching a bug - instance.query( - f""" - DROP TABLE IF EXISTS test.view_{topic}; - DROP TABLE IF EXISTS test.consumer_{topic}; - CREATE TABLE test.kafka_writer_{topic} (key String, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_format = '{format}', - kafka_max_rows_per_message = 2 {extra_setting}; - - CREATE TABLE test.kafka_{topic} (key String, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_format = '{format}' {extra_setting}; - - CREATE MATERIALIZED VIEW test.view_{topic} Engine=Log AS - SELECT key, value FROM test.kafka_{topic}; - """ - ) - - instance.http_query( - insert_query_template.format(table_name="test.kafka_writer_" + topic), - method="POST", - ) - - expected = f"""\ -{expected_key}\t{expected_value} -{expected_key}\t{expected_value} -{expected_key}\t{expected_value} -""" - # give some times for the readers to read the messages - for format in formats_to_test: - logging.debug(f"Checking result for {format}") - topic = topic_prefix + format - - result = instance.query_with_retry( - f"SELECT * FROM test.view_{topic}", - check_callback=lambda res: res.count("\n") == 3, - ) - - assert TSV(result) == TSV(expected) - - kafka_delete_topic(admin_client, topic) - - if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/test_produce_http_interface.py b/tests/integration/test_storage_kafka/test_produce_http_interface.py new file mode 100644 index 00000000000..fc10a07f239 --- /dev/null +++ b/tests/integration/test_storage_kafka/test_produce_http_interface.py @@ -0,0 +1,243 @@ +import time +import logging + +import pytest +from helpers.cluster import ClickHouseCluster, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient +from kafka.admin import NewTopic + +if is_arm(): + pytestmark = pytest.mark.skip + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/kafka.xml", "configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_kafka=True, + with_zookeeper=True, # For Replicated Table + macros={ + "kafka_broker": "kafka1", + "kafka_topic_old": "old", + "kafka_group_name_old": "old", + "kafka_topic_new": "new", + "kafka_group_name_new": "new", + "kafka_client_id": "instance", + "kafka_format_json_each_row": "JSONEachRow", + }, + clickhouse_path_dir="clickhouse_path", +) + + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + cluster.start() + kafka_id = instance.cluster.kafka_docker_id + print(("kafka_id is {}".format(kafka_id))) + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def kafka_setup_teardown(): + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + # logging.debug("kafka is available - running test") + yield # run test + + +def kafka_create_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + logging.debug( + f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" + ) + topics_list = [ + NewTopic( + name=topic_name, + num_partitions=num_partitions, + replication_factor=replication_factor, + topic_configs=config, + ) + ] + retries = 0 + while True: + try: + admin_client.create_topics(new_topics=topics_list, validate_only=False) + logging.debug("Admin client succeed") + return + except Exception as e: + retries += 1 + time.sleep(0.5) + if retries < max_retries: + logging.warning(f"Failed to create topic {e}") + else: + raise + + +def kafka_delete_topic(admin_client, topic, max_retries=50): + result = admin_client.delete_topics([topic]) + for topic, e in result.topic_error_codes: + if e == 0: + logging.debug(f"Topic {topic} deleted") + else: + logging.error(f"Failed to delete topic {topic}: {e}") + + retries = 0 + while True: + topics_listed = admin_client.list_topics() + logging.debug(f"TOPICS LISTED: {topics_listed}") + if topic not in topics_listed: + return + else: + retries += 1 + time.sleep(0.5) + if retries > max_retries: + raise Exception(f"Failed to delete topics {topic}, {result}") + + +def test_kafka_produce_http_interface_row_based_format(kafka_cluster): + # reproduction of #61060 with validating the written messages + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic_prefix = "http_row_" + + # It is important to have: + # - long enough messages + # - enough messages + # I don't know the exact requirement for message sizes, but it doesn't reproduce with short messages + # For the number of messages it seems like at least 3 messages is necessary + expected_key = "01234567890123456789" + expected_value = "aaaaabbbbbccccc" + + insert_query_end = f"(key, value) VALUES ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}')" + insert_query_template = "INSERT INTO {table_name} " + insert_query_end + + extra_settings = { + "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", + "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", + "Template": ", format_template_row='string_key_value.format'", + } + + # Only the formats that can be used both and input and output format are tested + # Reasons to exclude following formats: + # - JSONStrings: not actually an input format + # - ProtobufSingle: I cannot make it work to parse the messages. Probably something is broken, + # because the producer can write multiple rows into a same message, which makes them impossible to parse properly. Should added after #67549 is fixed. + # - ProtobufList: I didn't want to deal with the envelope and stuff + # - Npy: supports only single column + # - LineAsString: supports only single column + # - RawBLOB: supports only single column + formats_to_test = [ + "TabSeparated", + "TabSeparatedRaw", + "TabSeparatedWithNames", + "TabSeparatedWithNamesAndTypes", + "TabSeparatedRawWithNames", + "TabSeparatedRawWithNamesAndTypes", + "Template", + "CSV", + "CSVWithNames", + "CSVWithNamesAndTypes", + "CustomSeparated", + "CustomSeparatedWithNames", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONColumns", + "JSONColumnsWithMetadata", + "JSONCompact", + "JSONCompactColumns", + "JSONEachRow", + "JSONStringsEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNames", + "JSONCompactEachRowWithNamesAndTypes", + "JSONCompactStringsEachRow", + "JSONCompactStringsEachRowWithNames", + "JSONCompactStringsEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "BSONEachRow", + "TSKV", + "Protobuf", + "Avro", + "Parquet", + "Arrow", + "ArrowStream", + "ORC", + "RowBinary", + "RowBinaryWithNames", + "RowBinaryWithNamesAndTypes", + "Native", + "CapnProto", + "MsgPack", + ] + for format in formats_to_test: + logging.debug(f"Creating tables and writing messages to {format}") + topic = topic_prefix + format + kafka_create_topic(admin_client, topic) + + extra_setting = extra_settings.get(format, "") + + # kafka_max_rows_per_message is set to 2 to make sure every format produces at least 2 messages, thus increasing the chance of catching a bug + instance.query( + f""" + DROP TABLE IF EXISTS test.view_{topic}; + DROP TABLE IF EXISTS test.consumer_{topic}; + CREATE TABLE test.kafka_writer_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}', + kafka_max_rows_per_message = 2 {extra_setting}; + + CREATE TABLE test.kafka_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}' {extra_setting}; + + CREATE MATERIALIZED VIEW test.view_{topic} Engine=Log AS + SELECT key, value FROM test.kafka_{topic}; + """ + ) + instance.http_query( + insert_query_template.format(table_name="test.kafka_writer_" + topic), + method="POST", + ) + + expected = f"""\ +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +""" + # give some times for the readers to read the messages + for format in formats_to_test: + logging.debug(f"Checking result for {format}") + topic = topic_prefix + format + + result = instance.query_with_retry( + f"SELECT * FROM test.view_{topic}", + check_callback=lambda res: res.count("\n") == 3, + ) + + assert TSV(result) == TSV(expected) + + kafka_delete_topic(admin_client, topic) + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() From 613ebe367c1f811eea38d7c5e778cedddbfb0ce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 10 Aug 2024 22:05:11 +0000 Subject: [PATCH 108/132] Only add extra cell when necessary --- tests/ci/report.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 15b1512896a..6779a6dae96 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -738,7 +738,7 @@ def create_test_html_report( if test_results: rows_part = [] num_fails = 0 - has_test_time = False + has_test_time = any(tr.time is not None for tr in test_results) has_log_urls = False # Display entires with logs at the top (they correspond to failed tests) @@ -770,12 +770,12 @@ def create_test_html_report( row.append(f'{test_result.status}') colspan += 1 - row.append("") - if test_result.time is not None: - has_test_time = True - row.append(str(test_result.time)) - row.append("") - colspan += 1 + if has_test_time: + if test_result.time is not None: + row.append(f"{test_result.time}") + else: + row.append("") + colspan += 1 if test_result.log_urls is not None: has_log_urls = True From e93584e741fab888977413c800df4595220e7552 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 10 Aug 2024 23:02:30 +0000 Subject: [PATCH 109/132] fix Field conversion to IPv4 --- src/Interpreters/convertFieldToType.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 3489852bbd5..738c51baa64 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -304,8 +304,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isIPv4() && src.getType() == Field::Types::UInt64) { - /// convert to UInt32 which is the underlying type for native IPv4 - return convertNumericType(src, type); + /// convert through UInt32 which is the underlying type for native IPv4 + return static_cast(convertNumericType(src, type).safeGet()); } } else if (which_type.isUUID() && src.getType() == Field::Types::UUID) From 1142305b113e261d0c8910c0b622ba94727fe78d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 10 Aug 2024 09:53:43 +0200 Subject: [PATCH 110/132] tests: fix 01246_buffer_flush flakiness due to slow trace_log flush Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/01246_buffer_flush.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh index aea91a0bf6b..3c7b9038e1f 100755 --- a/tests/queries/0_stateless/01246_buffer_flush.sh +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -27,7 +27,7 @@ function wait_until() function get_buffer_delay() { local buffer_insert_id=$1 && shift - query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" query " WITH (SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_, From 45028620332d55391e900b6c7e75acb34df1d98c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 11 Aug 2024 08:35:47 +0000 Subject: [PATCH 111/132] Fix no-SSE3 build --- contrib/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index eb3afe0ccdf..b33e7083e32 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -179,7 +179,7 @@ else() message(STATUS "Not using QPL") endif () -if (OS_LINUX AND ARCH_AMD64) +if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QATLIB) message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") From 53bc1b7e3539cde14cb34f26af296bde5c29449e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 11 Aug 2024 13:19:36 +0200 Subject: [PATCH 112/132] Revert "Refactor tests for (experimental) statistics" --- docs/en/development/tests.md | 4 +- .../statements/alter/statistics.md | 16 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- ...2864_statistics_count_min_sketch.reference | 14 ++ .../02864_statistics_count_min_sketch.sql | 70 ++++++ .../02864_statistics_ddl.reference | 37 ++- .../0_stateless/02864_statistics_ddl.sql | 234 ++++-------------- ...delayed_materialization_in_merge.reference | 12 - ...stics_delayed_materialization_in_merge.sql | 36 --- .../02864_statistics_exception.reference | 0 .../02864_statistics_exception.sql | 55 ++++ ..._statistics_materialize_in_merge.reference | 10 + .../02864_statistics_materialize_in_merge.sql | 52 ++++ .../02864_statistics_predicates.reference | 98 -------- .../02864_statistics_predicates.sql | 214 ---------------- .../02864_statistics_uniq.reference | 35 +++ .../0_stateless/02864_statistics_uniq.sql | 73 ++++++ .../02864_statistics_usage.reference | 20 -- .../0_stateless/02864_statistics_usage.sql | 42 ---- 19 files changed, 399 insertions(+), 625 deletions(-) create mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.reference create mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql create mode 100644 tests/queries/0_stateless/02864_statistics_exception.reference create mode 100644 tests/queries/0_stateless/02864_statistics_exception.sql create mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference create mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_predicates.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_predicates.sql create mode 100644 tests/queries/0_stateless/02864_statistics_uniq.reference create mode 100644 tests/queries/0_stateless/02864_statistics_uniq.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_usage.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_usage.sql diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 6cb36e2049b..269995a1a96 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. :::note A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs @@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 7a1774a01b5..6880cef0e5c 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -8,28 +8,26 @@ sidebar_label: STATISTICS The following operations are available: -- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata. +- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata. -- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata. +- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata. -- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. +- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. -- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. +- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. -- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). The first two commands are lightweight in a sense that they only change metadata or remove files. Also, they are replicated, syncing statistics metadata via ZooKeeper. -## Example: - -Adding two statistics types to two columns: +There is an example adding two statistics types to two columns: ``` ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` :::note -Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c7101021f02..625b1281c61 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3517,7 +3517,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context const auto & new_column = new_metadata.getColumns().get(command.column_name); if (!old_column.type->equals(*new_column.type)) throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, - "ALTER types of column {} with statistics is not safe " + "ALTER types of column {} with statistics is not not safe " "because it can change the representation of statistics", backQuoteIfNeed(command.column_name)); } diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference new file mode 100644 index 00000000000..02c41656a36 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference @@ -0,0 +1,14 @@ +CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +Test statistics count_min: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) +Test statistics multi-types: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) +Test LowCardinality and Nullable data type: +tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql new file mode 100644 index 00000000000..c730aa7b4a7 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql @@ -0,0 +1,70 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS tab SYNC; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET allow_suspicious_low_cardinality_types=1; +SET mutations_sync = 2; + +CREATE TABLE tab +( + a String, + b UInt64, + c Int64, + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE tab; + +INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'Test statistics count_min:'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab ADD STATISTICS b TYPE count_min; +ALTER TABLE tab ADD STATISTICS c TYPE count_min; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS a, b, c; + + +SELECT 'Test statistics multi-types:'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS a, b, c; + +DROP TABLE IF EXISTS tab SYNC; + + +SELECT 'Test LowCardinality and Nullable data type:'; +DROP TABLE IF EXISTS tab2 SYNC; +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE tab2 +( + a LowCardinality(Int64) STATISTICS(count_min), + b Nullable(Int64) STATISTICS(count_min), + c LowCardinality(Nullable(Int64)) STATISTICS(count_min), + pk String, +) Engine = MergeTree() ORDER BY pk; + +select name from system.tables where name = 'tab2' and database = currentDatabase(); + +DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference index 0e453b0ee8a..a7ff5caa0b0 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.reference +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -1,6 +1,31 @@ -CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +10 +0 +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) +10 +CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After add statistic +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) +20 +CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) +20 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index 32b56a842b7..fe612efe2ac 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -1,195 +1,59 @@ --- Tags: no-fasttest --- no-fasttest: 'count_min' sketches need a 3rd party library - --- Tests that DDL statements which create / drop / materialize statistics - -SET mutations_sync = 1; +-- Tests that various DDL statements create/drop/materialize statistics DROP TABLE IF EXISTS tab; --- Error case: Can't create statistics when allow_experimental_statistics = 0 -CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - SET allow_experimental_statistics = 1; - --- Error case: Unknown statistics types are rejected -CREATE TABLE tab (col Float64 STATISTICS(no_statistics_type)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- Error case: The same statistics type can't exist more than once on a column -CREATE TABLE tab (col Float64 STATISTICS(tdigest, tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - -SET allow_suspicious_low_cardinality_types = 1; - --- Statistics can only be created on columns of specific data types (depending on the statistics kind), (*) - --- tdigest requires data_type.isValueRepresentedByInteger --- These types work: -CREATE TABLE tab (col UInt8 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col UInt256 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Float32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Decimal32(3) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col IPv4 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Nullable(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; --- These types don't work: -CREATE TABLE tab (col String STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col FixedString(1) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Array(Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - --- uniq requires data_type.isValueRepresentedByInteger --- These types work: -CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Float32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Decimal32(3) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime64 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; --- These types don't work: -CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - --- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String --- These types work: -CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; --- These types don't work: -CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - --- CREATE TABLE was easy, ALTER is more fun +SET allow_statistics_optimize = 1; CREATE TABLE tab ( - f64 Float64, - f64_tdigest Float64 STATISTICS(tdigest), - f32 Float32, - s String, - a Array(Float64) -) -Engine = MergeTree() -ORDER BY tuple(); + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; --- Error case: Unknown statistics types are rejected --- (relevant for ADD and MODIFY) -ALTER TABLE tab ADD STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab MODIFY STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } --- for some reason, ALTER TABLE tab MODIFY STATISTICS IF EXISTS is not supported - --- Error case: The same statistics type can't exist more than once on a column --- (relevant for ADD and MODIFY) --- Create the same statistics object twice -ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } --- Create an statistics which exists already -ALTER TABLE tab ADD STATISTICS f64_tdigest TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64_tdigest TYPE tdigest; -- no-op -ALTER TABLE tab MODIFY STATISTICS f64_tdigest TYPE tdigest; -- no-op - --- Error case: Column does not exist --- (relevant for ADD, MODIFY, DROP, CLEAR, and MATERIALIZE) --- Note that the results are unfortunately quite inconsistent ... -ALTER TABLE tab ADD STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS IF EXISTS no_such_column; -- no-op -ALTER TABLE tab CLEAR STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab CLEAR STATISTICS IF EXISTS no_such_column; -- no-op -ALTER TABLE tab MATERIALIZE STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS no_such_column; -- { serverError ILLEGAL_STATISTICS } - --- Error case: Column exists but has no statistics --- (relevant for MODIFY, DROP, CLEAR, and MATERIALIZE) --- Note that the results are unfortunately quite inconsistent ... -ALTER TABLE tab MODIFY STATISTICS s TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS s; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS IF EXISTS s; -- no-op -ALTER TABLE tab CLEAR STATISTICS s; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab CLEAR STATISTICS IF EXISTS s; -- no-op -ALTER TABLE tab MATERIALIZE STATISTICS s; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS s; -- { serverError ILLEGAL_STATISTICS } - --- We don't check systematically that that statistics can only be created via ALTER ADD STATISTICS on columns of specific data types (the --- internal type validation code is tested already above, (*)). Only do a rudimentary check for each statistics type with a data type that --- works and one that doesn't work. --- tdigest --- Works: -ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; -ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; --- Doesn't work: -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } --- uniq --- Works: -ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64; -ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; --- Doesn't work: -ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } --- count_min --- Works: -ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; -ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; --- Doesn't work: -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } - --- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing --- statistics objects (e.g. tdigest can be created on Float64 and UInt64) -ALTER TABLE tab MODIFY COLUMN f64_tdigest UInt64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } - --- Finally, do a full-circle test of a good case. Print table definition after each step. --- Intentionally specifying _two_ columns and _two_ statistics types to have that also tested. -SHOW CREATE TABLE tab; -ALTER TABLE tab ADD STATISTICS f64, f32 TYPE tdigest, uniq; -SHOW CREATE TABLE tab; -ALTER TABLE tab MODIFY STATISTICS f64, f32 TYPE tdigest, uniq; -SHOW CREATE TABLE tab; -ALTER TABLE tab CLEAR STATISTICS f64, f32; -SHOW CREATE TABLE tab; -ALTER TABLE tab MATERIALIZE STATISTICS f64, f32; -SHOW CREATE TABLE tab; -ALTER TABLE tab DROP STATISTICS f64, f32; SHOW CREATE TABLE tab; -DROP TABLE tab; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; +SELECT count(*) FROM tab WHERE b < NULL and a < '10'; + +ALTER TABLE tab DROP STATISTICS a, b; + +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; + +SELECT 'After add statistic'; + +SHOW CREATE TABLE tab; + +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'After materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +OPTIMIZE TABLE tab FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE b < 10 and a < 10; + +ALTER TABLE tab RENAME COLUMN b TO c; +SHOW CREATE TABLE tab; + +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT count(*) FROM tab WHERE c < 10 and a < 10; + +DROP TABLE IF EXISTS tab; diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference deleted file mode 100644 index eb5e685597c..00000000000 --- a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference +++ /dev/null @@ -1,12 +0,0 @@ -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) -After truncate, insert, and materialize - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql deleted file mode 100644 index d469a4c2036..00000000000 --- a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql +++ /dev/null @@ -1,36 +0,0 @@ --- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). --- (The concrete statistics type, column data type and predicate type don't matter) - --- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) - -DROP TABLE IF EXISTS tab; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET enable_analyzer = 1; - -SET materialize_statistics_on_insert = 0; - -CREATE TABLE tab -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) - -OPTIMIZE TABLE tab FINAL; -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) - -TRUNCATE TABLE tab; -SET mutations_sync = 2; -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE tab MATERIALIZE STATISTICS a, b; -SELECT 'After truncate, insert, and materialize'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.reference b/tests/queries/0_stateless/02864_statistics_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql new file mode 100644 index 00000000000..289ffee6600 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_exception.sql @@ -0,0 +1,55 @@ +-- Tests creating/dropping/materializing statistics produces the right exceptions. + +DROP TABLE IF EXISTS tab; + +-- Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +SET allow_experimental_statistics = 1; + +-- The same type of statistics can't exist more than once on a column +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest, tdigest) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +-- Unknown statistics types are rejected +CREATE TABLE tab +( + a Float64 STATISTICS(no_statistics_type) +) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +-- tDigest statistics can only be created on numeric columns +CREATE TABLE tab +( + a String STATISTICS(tdigest), +) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +CREATE TABLE tab +( + a Float64, + b String +) Engine = MergeTree() ORDER BY tuple(); + +ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; +-- Statistics can be created only on integer columns +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS a; +ALTER TABLE tab DROP STATISTICS IF EXISTS a; +ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; +ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } + +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; +ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; +ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference new file mode 100644 index 00000000000..5e969cf41cb --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference @@ -0,0 +1,10 @@ +10 +10 +10 +statistics not used Condition less(b, 10_UInt8) moved to PREWHERE +statistics not used Condition less(a, 10_UInt8) moved to PREWHERE +statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE +statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE +statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE +statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE +2 0 diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql new file mode 100644 index 00000000000..6606cff263f --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql @@ -0,0 +1,52 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). + +DROP TABLE IF EXISTS tab; + +SET enable_analyzer = 1; +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; + +OPTIMIZE TABLE tab FINAL; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; + +TRUNCATE TABLE tab; +SET mutations_sync = 2; + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; + +SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; + +DROP TABLE tab; + +SYSTEM FLUSH LOGS; + +SELECT log_comment, message FROM system.text_log JOIN +( + SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log + WHERE current_database = currentDatabase() + AND query LIKE 'SELECT count(*) FROM tab%' + AND type = 'QueryFinish' +) AS query_log USING (query_id) +WHERE message LIKE '%moved to PREWHERE%' +ORDER BY event_time_microseconds; + +SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) +FROM system.query_log +WHERE current_database = currentDatabase() + AND query LIKE 'INSERT INTO tab SELECT%' + AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference deleted file mode 100644 index ffbd7269e05..00000000000 --- a/tests/queries/0_stateless/02864_statistics_predicates.reference +++ /dev/null @@ -1,98 +0,0 @@ -u64 and = -10 -10 -10 -10 -0 -0 -0 -0 -10 -10 -10 -10 -u64 and < -70 -70 -70 -70 -80 -80 -80 -80 -70 -70 -70 -70 -f64 and = -10 -10 -10 -10 -0 -0 -0 -0 -10 -10 -10 -10 -0 -0 -0 -0 -f64 and < -70 -70 -70 -70 -80 -80 -80 -80 -70 -70 -70 -70 -80 -80 -80 -80 -dt and = -0 -0 -0 -0 -10 -10 -10 -10 -dt and < -10000 -10000 -10000 -10000 -70 -70 -70 -70 -b and = -5000 -5000 -5000 -5000 -5000 -5000 -5000 -5000 -5000 -5000 -5000 -5000 -0 -0 -0 -0 -s and = -10 -10 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql deleted file mode 100644 index 779116cf19a..00000000000 --- a/tests/queries/0_stateless/02864_statistics_predicates.sql +++ /dev/null @@ -1,214 +0,0 @@ --- Tags: no-fasttest --- no-fasttest: 'count_min' sketches need a 3rd party library - --- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types. - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -DROP TABLE IF EXISTS tab; - -CREATE TABLE tab -( - u64 UInt64, - u64_tdigest UInt64 STATISTICS(tdigest), - u64_count_min UInt64 STATISTICS(count_min), - u64_uniq UInt64 STATISTICS(uniq), - f64 Float64, - f64_tdigest Float64 STATISTICS(tdigest), - f64_count_min Float64 STATISTICS(count_min), - f64_uniq Float64 STATISTICS(uniq), - dt DateTime, - dt_tdigest DateTime STATISTICS(tdigest), - dt_count_min DateTime STATISTICS(count_min), - dt_uniq DateTime STATISTICS(uniq), - b Bool, - b_tdigest Bool STATISTICS(tdigest), - b_count_min Bool STATISTICS(count_min), - b_uniq Bool STATISTICS(uniq), - s String, - -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest - s_count_min String STATISTICS(count_min) - -- s_uniq String STATISTICS(uniq), -- not supported by uniq -) Engine = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0; - -INSERT INTO tab --- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000; -SELECT number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 1000, - number % 2, - number % 2, - number % 2, - number % 2, - toString(number % 1000), - toString(number % 1000) -FROM system.numbers LIMIT 10000; - --- u64 ---------------------------------------------------- - -SELECT 'u64 and ='; - -SELECT count(*) FROM tab WHERE u64 = 7; -SELECT count(*) FROM tab WHERE u64_tdigest = 7; -SELECT count(*) FROM tab WHERE u64_count_min = 7; -SELECT count(*) FROM tab WHERE u64_uniq = 7; - -SELECT count(*) FROM tab WHERE u64 = 7.7; -SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; -SELECT count(*) FROM tab WHERE u64_count_min = 7.7; -SELECT count(*) FROM tab WHERE u64_uniq = 7.7; - -SELECT count(*) FROM tab WHERE u64 = '7'; -SELECT count(*) FROM tab WHERE u64_tdigest = '7'; -SELECT count(*) FROM tab WHERE u64_count_min = '7'; -SELECT count(*) FROM tab WHERE u64_uniq = '7'; - -SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } - -SELECT 'u64 and <'; - -SELECT count(*) FROM tab WHERE u64 < 7; -SELECT count(*) FROM tab WHERE u64_tdigest < 7; -SELECT count(*) FROM tab WHERE u64_count_min < 7; -SELECT count(*) FROM tab WHERE u64_uniq < 7; - -SELECT count(*) FROM tab WHERE u64 < 7.7; -SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; -SELECT count(*) FROM tab WHERE u64_count_min < 7.7; -SELECT count(*) FROM tab WHERE u64_uniq < 7.7; - -SELECT count(*) FROM tab WHERE u64 < '7'; -SELECT count(*) FROM tab WHERE u64_tdigest < '7'; -SELECT count(*) FROM tab WHERE u64_count_min < '7'; -SELECT count(*) FROM tab WHERE u64_uniq < '7'; - -SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } - --- f64 ---------------------------------------------------- - -SELECT 'f64 and ='; - -SELECT count(*) FROM tab WHERE f64 = 7; -SELECT count(*) FROM tab WHERE f64_tdigest = 7; -SELECT count(*) FROM tab WHERE f64_count_min = 7; -SELECT count(*) FROM tab WHERE f64_uniq = 7; - -SELECT count(*) FROM tab WHERE f64 = 7.7; -SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; -SELECT count(*) FROM tab WHERE f64_count_min = 7.7; -SELECT count(*) FROM tab WHERE f64_uniq = 7.7; - -SELECT count(*) FROM tab WHERE f64 = '7'; -SELECT count(*) FROM tab WHERE f64_tdigest = '7'; -SELECT count(*) FROM tab WHERE f64_count_min = '7'; -SELECT count(*) FROM tab WHERE f64_uniq = '7'; - -SELECT count(*) FROM tab WHERE f64 = '7.7'; -SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; -SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; -SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; - -SELECT 'f64 and <'; - -SELECT count(*) FROM tab WHERE f64 < 7; -SELECT count(*) FROM tab WHERE f64_tdigest < 7; -SELECT count(*) FROM tab WHERE f64_count_min < 7; -SELECT count(*) FROM tab WHERE f64_uniq < 7; - -SELECT count(*) FROM tab WHERE f64 < 7.7; -SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; -SELECT count(*) FROM tab WHERE f64_count_min < 7.7; -SELECT count(*) FROM tab WHERE f64_uniq < 7.7; - -SELECT count(*) FROM tab WHERE f64 < '7'; -SELECT count(*) FROM tab WHERE f64_tdigest < '7'; -SELECT count(*) FROM tab WHERE f64_count_min < '7'; -SELECT count(*) FROM tab WHERE f64_uniq < '7'; - -SELECT count(*) FROM tab WHERE f64 < '7.7'; -SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; -SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; -SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; - --- dt ---------------------------------------------------- - -SELECT 'dt and ='; - -SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; - -SELECT count(*) FROM tab WHERE dt = 7; -SELECT count(*) FROM tab WHERE dt_tdigest = 7; -SELECT count(*) FROM tab WHERE dt_count_min = 7; -SELECT count(*) FROM tab WHERE dt_uniq = 7; - -SELECT 'dt and <'; - -SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; -SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; - -SELECT count(*) FROM tab WHERE dt < 7; -SELECT count(*) FROM tab WHERE dt_tdigest < 7; -SELECT count(*) FROM tab WHERE dt_count_min < 7; -SELECT count(*) FROM tab WHERE dt_uniq < 7; - --- b ---------------------------------------------------- - -SELECT 'b and ='; - -SELECT count(*) FROM tab WHERE b = true; -SELECT count(*) FROM tab WHERE b_tdigest = true; -SELECT count(*) FROM tab WHERE b_count_min = true; -SELECT count(*) FROM tab WHERE b_uniq = true; - -SELECT count(*) FROM tab WHERE b = 'true'; -SELECT count(*) FROM tab WHERE b_tdigest = 'true'; -SELECT count(*) FROM tab WHERE b_count_min = 'true'; -SELECT count(*) FROM tab WHERE b_uniq = 'true'; - -SELECT count(*) FROM tab WHERE b = 1; -SELECT count(*) FROM tab WHERE b_tdigest = 1; -SELECT count(*) FROM tab WHERE b_count_min = 1; -SELECT count(*) FROM tab WHERE b_uniq = 1; - -SELECT count(*) FROM tab WHERE b = 1.1; -SELECT count(*) FROM tab WHERE b_tdigest = 1.1; -SELECT count(*) FROM tab WHERE b_count_min = 1.1; -SELECT count(*) FROM tab WHERE b_uniq = 1.1; - --- s ---------------------------------------------------- - -SELECT 's and ='; - -SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } --- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported -SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } --- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported - -SELECT count(*) FROM tab WHERE s = '7'; --- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported -SELECT count(*) FROM tab WHERE s_count_min = '7'; --- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.reference b/tests/queries/0_stateless/02864_statistics_uniq.reference new file mode 100644 index 00000000000..77786dbdd8c --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_uniq.reference @@ -0,0 +1,35 @@ +CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) +After modify TDigest + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed) +After drop + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql new file mode 100644 index 00000000000..0f5f353c045 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_uniq.sql @@ -0,0 +1,73 @@ +DROP TABLE IF EXISTS t1; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; + +CREATE TABLE t1 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + c Int64 STATISTICS(tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; + +SHOW CREATE TABLE t1; + +INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; +INSERT INTO t1 select 0, 0, 11, generateUUIDv4(); + +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +OPTIMIZE TABLE t1 FINAL; + +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +SELECT 'After modify TDigest'; +ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest; +ALTER TABLE t1 MATERIALIZE STATISTICS c; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + + +ALTER TABLE t1 DROP STATISTICS c; + +SELECT 'After drop'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +SET allow_suspicious_low_cardinality_types=1; +CREATE TABLE t2 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + c LowCardinality(Int64) STATISTICS(tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; + +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t3 +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest), + c Nullable(Int64) STATISTICS(tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk +SETTINGS min_bytes_for_wide_part = 0; +INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; + +DROP TABLE IF EXISTS t3; + diff --git a/tests/queries/0_stateless/02864_statistics_usage.reference b/tests/queries/0_stateless/02864_statistics_usage.reference deleted file mode 100644 index a9f669b88c1..00000000000 --- a/tests/queries/0_stateless/02864_statistics_usage.reference +++ /dev/null @@ -1,20 +0,0 @@ -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) -After add and materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_usage.sql b/tests/queries/0_stateless/02864_statistics_usage.sql deleted file mode 100644 index 4956bd27e87..00000000000 --- a/tests/queries/0_stateless/02864_statistics_usage.sql +++ /dev/null @@ -1,42 +0,0 @@ --- Test that the optimizer picks up column statistics --- (The concrete statistics type, column data type and predicate type don't matter) - --- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET mutations_sync = 1; -SET enable_analyzer = 1; - -DROP TABLE IF EXISTS tab; - -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest) -) Engine = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0; - -INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) - -ALTER TABLE tab DROP STATISTICS a, b; -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) - -ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b; -INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; -SELECT 'After add and materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) - -OPTIMIZE TABLE tab FINAL; -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) - -ALTER TABLE tab RENAME COLUMN b TO c; -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then c (statistics used) - -DROP TABLE IF EXISTS tab; From 29afd2de785450f2e7f5faec1dc6b35e166cefb4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 13:26:45 +0200 Subject: [PATCH 113/132] Remove "Processing configuration file" message from clickhouse-local Make the behaviour identical to the clickhouse-client Signed-off-by: Azat Khuzhin --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6b0b8fc5b50..200beea7b63 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -143,7 +143,7 @@ void LocalServer::initialize(Poco::Util::Application & self) if (fs::exists(config_path)) { - ConfigProcessor config_processor(config_path, false, true); + ConfigProcessor config_processor(config_path); ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); From 4fec61da55c1032f274da87198af59c78cd0d87e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Aug 2024 12:35:27 +0000 Subject: [PATCH 114/132] fix wrong datatype in system.kafka_consumers --- src/Storages/System/StorageSystemKafkaConsumers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 86713632339..5e790587716 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -79,7 +79,7 @@ void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, Context auto & num_rebalance_revocations = assert_cast(*res_columns[index++]); auto & num_rebalance_assigments = assert_cast(*res_columns[index++]); auto & is_currently_used = assert_cast(*res_columns[index++]); - auto & last_used = assert_cast(*res_columns[index++]); + auto & last_used = assert_cast(*res_columns[index++]); auto & rdkafka_stat = assert_cast(*res_columns[index++]); const auto access = context->getAccess(); From 4ef3fe416debecefcea4d7336aac7c679092cf0c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 11 Aug 2024 13:08:53 +0000 Subject: [PATCH 115/132] Fix and simplify test --- .../02496_remove_redundant_sorting.reference | 68 ++++++++---------- .../02496_remove_redundant_sorting.sh | 43 ++++++----- ...emove_redundant_sorting_analyzer.reference | 71 ++++++++----------- 3 files changed, 82 insertions(+), 100 deletions(-) diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index a0a1fd60812..4d004f2f78f 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -465,6 +465,37 @@ Expression ((Projection + Before ORDER BY)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Projection + Before ORDER BY)) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query @@ -527,40 +558,3 @@ Expression (Projection) 2 4 1 3 0 2 --- presence of an inner OFFSET retains the ORDER BY --- query -WITH - t1 AS ( - SELECT SUM(a) AS a, b - FROM - VALUES ( - 'b UInt32, a Int32', - (1, 1), - (2, 0) - ) - GROUP BY 2 - ) -SELECT - SUM(a) -FROM ( - SELECT a, b - FROM t1 - ORDER BY 1 DESC, 2 - OFFSET 1 -) t2 --- explain -Expression (Projection) - Expression (Before ORDER BY) - Aggregating - Expression (Before GROUP BY) - Offset - Expression (Projection) - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - Expression (Projection) - Expression (Before ORDER BY) - Aggregating - Expression (Before GROUP BY) - ReadFromStorage (Values) --- execute -0 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index d59b4387101..c9bd242e429 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -302,6 +302,27 @@ FROM )" run_query "$query" +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" + echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" @@ -329,25 +350,3 @@ FROM ORDER BY number DESC )" run_query "$query" - -echo "-- presence of an inner OFFSET retains the ORDER BY" -query="WITH - t1 AS ( - SELECT SUM(a) AS a, b - FROM - VALUES ( - 'b UInt32, a Int32', - (1, 1), - (2, 0) - ) - GROUP BY 2 - ) -SELECT - SUM(a) -FROM ( - SELECT a, b - FROM t1 - ORDER BY 1 DESC, 2 - OFFSET 1 -) t2" -run_query "$query" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index 58441de5f22..dd5ac7bf706 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -464,6 +464,36 @@ Expression ((Project names + Projection)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Offset + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query @@ -533,44 +563,3 @@ Expression (Project names) 2 4 1 3 0 2 --- presence of an inner OFFSET retains the ORDER BY --- query -WITH - t1 AS ( - SELECT SUM(a) AS a, b - FROM - VALUES ( - 'b UInt32, a Int32', - (1, 1), - (2, 0) - ) - GROUP BY 2 - ) -SELECT - SUM(a) -FROM ( - SELECT a, b - FROM t1 - ORDER BY 1 DESC, 2 - OFFSET 1 -) t2 --- explain -Expression (Project names) - Expression (Projection) - Aggregating - Expression (Before GROUP BY) - Expression (Change column names to column identifiers) - Expression (Project names) - Offset - Sorting (Sorting for ORDER BY) - Expression (Before ORDER BY) - Expression (Projection) - Expression (Change column names to column identifiers) - Expression (Project names) - Expression (Projection) - Aggregating - Expression (Before GROUP BY) - Expression (Change column names to column identifiers) - ReadFromStorage (Values) --- execute -0 From 8e706265e6df3653de76224bfc050b4f52e49282 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Aug 2024 16:29:35 +0000 Subject: [PATCH 116/132] fix --- src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 6a0522b0676..3ca7e8183f1 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -125,7 +125,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d { Tuple new_tuple; - for (const auto & child : tuple) + for (auto & child : tuple) if (shardContains(child, name, data)) new_tuple.emplace_back(std::move(child)); From d314e5aa45fb8ac91324721ab278185b09437a40 Mon Sep 17 00:00:00 2001 From: Vladimir Varankin Date: Sun, 11 Aug 2024 18:37:29 +0200 Subject: [PATCH 117/132] typos in prometheus.md --- docs/en/interfaces/prometheus.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/interfaces/prometheus.md b/docs/en/interfaces/prometheus.md index bf541901b34..8e7023cc51f 100644 --- a/docs/en/interfaces/prometheus.md +++ b/docs/en/interfaces/prometheus.md @@ -75,7 +75,7 @@ Data are received by this protocol and written to a [TimeSeries](/en/engines/tab /write - remote_writeremote_write db_name time_series_table
@@ -105,7 +105,7 @@ Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) /read - remote_readremote_read
db_name time_series_table
@@ -144,14 +144,14 @@ Multiple protocols can be specified together in one place: /write - remote_writeremote_write
db_name.time_series_table
/read - remote_readremote_read
db_name.time_series_table
From e384e2c38e405b1b4758adaa44cd321e6d7f41b3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 18:34:33 +0200 Subject: [PATCH 118/132] tests: fix 02122_join_group_by_timeout flakiness CI found [1] failure of the test: 2024-08-11 21:06:07 /usr/share/clickhouse-test/queries/0_stateless/02122_join_group_by_timeout.sh: line 51: 52614 Killed timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q "SELECT a.name as n And the problem is not the server, but the client, since query executed for ~1 second: 2024.08.11 21:06:02.284318 [ 49232 ] {ba989ee2-f615-49ca-bcd8-31b3916aeb2c} executeQuery: (from [::1]:54144) (comment: 02122_join_group_by_timeout.sh) SELECT a.name as n FROM ( SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 ) AS a, ( SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 ) as b FORMAT Null SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' (stage: Complete) 2024.08.11 21:06:03.331249 [ 49232 ] {ba989ee2-f615-49ca-bcd8-31b3916aeb2c} executeQuery: Read 517104 rows, 3.95 MiB in 1.072023 sec., 482362.78512681165 rows/sec., 3.68 MiB/sec. [1]: https://s3.amazonaws.com/clickhouse-test-reports/67134/18da3f0ab63da1eef9396627d0dfd56cf5356f65/stateless_tests__msan__[1_4].html So instead of using timeout, let's use time from the system.query_log instead. Signed-off-by: Azat Khuzhin --- .../02122_join_group_by_timeout.reference | 6 +- .../02122_join_group_by_timeout.sh | 70 ++++++++++--------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference index f314e22e519..6500560e8fc 100644 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.reference +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -1,4 +1,6 @@ -Code: 159 -0 +Code: 159 +query_duration 1 +0 +query_duration 1 Code: 159 0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 8380c5dbd0c..b4644878544 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -1,27 +1,23 @@ #!/usr/bin/env bash -# Tags: no-debug - -# no-debug: Query is canceled by timeout after max_execution_time, -# but sending an exception to the client may hang -# for more than MAX_PROCESS_WAIT seconds in a slow debug build, -# and test will fail. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -MAX_PROCESS_WAIT=5 - -IS_SANITIZER=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%'") -if [ "$IS_SANITIZER" -gt 0 ]; then - # Query may hang for more than 5 seconds, especially in tsan build - MAX_PROCESS_WAIT=15 +TIMEOUT=5 +IS_SANITIZER_OR_DEBUG=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.warnings WHERE message like '%built with sanitizer%' or message like '%built in debug mode%'") +if [ "$IS_SANITIZER_OR_DEBUG" -gt 0 ]; then + # Increase the timeout due to in debug/sanitizers build: + # - client is slow + # - stacktrace resolving is slow + TIMEOUT=15 fi # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ - "SELECT * FROM +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --query_id "$query_id" --max_execution_time 1 -q " + SELECT * FROM ( SELECT a.name as n FROM @@ -34,28 +30,35 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -m1 -o "Code: 159" +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" + ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ - "SELECT a.name as n - FROM - ( - SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 - ) AS a, - ( - SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 - ) as b - FORMAT Null - SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +query_id=$(random_str 12) +$CLICKHOUSE_CLIENT --query_id "$query_id" -q " + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' +" echo $? +$CLICKHOUSE_CLIENT -q "system flush logs" +${CLICKHOUSE_CURL} -q -sS "$CLICKHOUSE_URL" -d "select 'query_duration', round(query_duration_ms/1000) from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and query_id = '$query_id' and type != 'QueryStart'" # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ - "SELECT * FROM +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d " + SELECT * FROM ( SELECT a.name as n FROM @@ -68,12 +71,13 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + FORMAT Null +" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ - "SELECT a.name as n +${CLICKHOUSE_CURL} -q --max-time $TIMEOUT -sS "$CLICKHOUSE_URL" -d " + SELECT a.name as n FROM ( SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 @@ -83,5 +87,5 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ ) as b FORMAT Null SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' - " +" echo $? From 8a48b3334433fe5e77c23ff6df10e454db2d3f82 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 21:27:08 +0200 Subject: [PATCH 119/132] Fix settings/current_database in system.processes for async BACKUP/RESTORE Signed-off-by: Azat Khuzhin --- src/Backups/BackupsWorker.cpp | 4 ++++ src/Interpreters/ProcessList.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 0b93ae6d547..8b45c816817 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -490,6 +490,8 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); + /// Update context to preserve query information in processlist (settings, current_database) + process_list_element->updateContext(context_in_use); thread_pool.scheduleOrThrowOnError( [this, @@ -853,6 +855,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); + /// Update context to preserve query information in processlist (settings, current_database) + process_list_element->updateContext(context_in_use); thread_pool.scheduleOrThrowOnError( [this, diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index accb73e12df..248ba947bc1 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -244,6 +244,9 @@ public: /// Same as checkTimeLimit but it never throws [[nodiscard]] bool checkTimeLimitSoft(); + /// Use it in case of the query left in background to execute asynchronously + void updateContext(ContextWeakPtr weak_context) { context = std::move(weak_context); } + /// Get the reference for the start of the query. Used to synchronize with other Stopwatches UInt64 getQueryCPUStartTime() { return watch.getStart(); } }; From 5c8665c66069256f4e34fb32068fab2fcb90cc65 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Aug 2024 20:40:55 +0000 Subject: [PATCH 120/132] fix system.kafka_consumers and doc, fix tidy --- docs/en/operations/system-tables/kafka_consumers.md | 1 + src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp | 2 +- src/Storages/System/StorageSystemKafkaConsumers.cpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/system-tables/kafka_consumers.md b/docs/en/operations/system-tables/kafka_consumers.md index 7e28a251e26..d58c9f754fd 100644 --- a/docs/en/operations/system-tables/kafka_consumers.md +++ b/docs/en/operations/system-tables/kafka_consumers.md @@ -24,6 +24,7 @@ Columns: - `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions - `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster - `is_currently_used`, (UInt8) - consumer is in use +- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds - `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds). Example: diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 3ca7e8183f1..86cec8659f5 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -129,7 +129,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d if (shardContains(child, name, data)) new_tuple.emplace_back(std::move(child)); - if (new_tuple.size() == 0) + if (new_tuple.empty()) new_tuple.emplace_back(std::move(tuple.back())); tuple_literal->value = std::move(new_tuple); diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index 5e790587716..db6804d3ad7 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -42,7 +42,7 @@ ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, {"is_currently_used", std::make_shared(), "The flag which shows whether the consumer is in use."}, - {"last_used", std::make_shared(6), "The last time this consumer was in use."}, + {"last_used", std::make_shared(), "The last time this consumer was in use, unix time in microseconds."}, {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; } From 5acf9f6f8160ee6de2845b13bed07d63832ca3fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 07:01:52 +0200 Subject: [PATCH 121/132] Fix `test_cluster_all_replicas` --- tests/integration/test_cluster_all_replicas/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index d8bad180e1b..9797db7c498 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -21,14 +21,14 @@ def start_cluster(): def test_cluster(start_cluster): assert ( node1.query( - "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one)" + "SELECT hostName() FROM clusterAllReplicas('one_shard_two_nodes', system.one) ORDER BY ALL" ) == "node1\nnode2\n" ) assert set( node1.query( - """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) ORDER BY dummy""" + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) ORDER BY ALL""" ).splitlines() ) == {"node1\t0", "node2\t0"} @@ -48,7 +48,7 @@ def test_global_in(start_cluster): assert set( node1.query( - """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u""" + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u ORDER BY ALL""" ).splitlines() ) == {"node1\t0", "node2\t0"} @@ -63,7 +63,7 @@ def test_global_in(start_cluster): def test_skip_unavailable_replica(start_cluster, cluster): assert ( node1.query( - f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=1" + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) ORDER BY ALL settings skip_unavailable_shards=1" ) == "node1\nnode2\n" ) @@ -81,5 +81,5 @@ def test_error_on_unavailable_replica(start_cluster, cluster): # so when skip_unavailable_shards=0 - any unavailable replica should lead to an error with pytest.raises(QueryRuntimeException): node1.query( - f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) settings skip_unavailable_shards=0" + f"SELECT hostName() FROM clusterAllReplicas('{cluster}', system.one) ORDER BY ALL settings skip_unavailable_shards=0" ) From b3504def35ba6baf7a7cf1f9b84a72e8f70f95f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 07:15:08 +0200 Subject: [PATCH 122/132] Fix leftovers --- tests/config/config.d/transactions.xml | 4 ++-- .../test_distributed_type_object/configs/remote_servers.xml | 2 +- .../configs/host_regexp.xml | 4 ++-- .../configs/listen_host.xml | 2 +- .../test_jbod_ha/configs/config.d/storage_configuration.xml | 2 +- .../integration/test_server_reload/configs/default_passwd.xml | 2 +- .../test_server_reload/configs/overrides_from_zk.xml | 2 +- tests/integration/test_version_update/configs/log_conf.xml | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/config/config.d/transactions.xml b/tests/config/config.d/transactions.xml index 9948b1f1865..64e166b81b5 100644 --- a/tests/config/config.d/transactions.xml +++ b/tests/config/config.d/transactions.xml @@ -1,4 +1,4 @@ - + 42 @@ -18,4 +18,4 @@ 0.01 - + diff --git a/tests/integration/test_distributed_type_object/configs/remote_servers.xml b/tests/integration/test_distributed_type_object/configs/remote_servers.xml index ebce4697529..0ea61f0d5fc 100644 --- a/tests/integration/test_distributed_type_object/configs/remote_servers.xml +++ b/tests/integration/test_distributed_type_object/configs/remote_servers.xml @@ -1,4 +1,4 @@ - + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml index 7a2141e6c7e..0bf7fad9a70 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml index 58ef55cd3f3..4f0841ab8b6 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml @@ -1,4 +1,4 @@ - + :: 0.0.0.0 1 diff --git a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml index b5c351d105b..b2c4645644a 100644 --- a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml @@ -1,4 +1,4 @@ - + 1000 diff --git a/tests/integration/test_server_reload/configs/default_passwd.xml b/tests/integration/test_server_reload/configs/default_passwd.xml index f79149e7e23..45ae005bd19 100644 --- a/tests/integration/test_server_reload/configs/default_passwd.xml +++ b/tests/integration/test_server_reload/configs/default_passwd.xml @@ -1,4 +1,4 @@ - + diff --git a/tests/integration/test_server_reload/configs/overrides_from_zk.xml b/tests/integration/test_server_reload/configs/overrides_from_zk.xml index d420faa88a2..b17c5c9fa99 100644 --- a/tests/integration/test_server_reload/configs/overrides_from_zk.xml +++ b/tests/integration/test_server_reload/configs/overrides_from_zk.xml @@ -1,4 +1,4 @@ - + diff --git a/tests/integration/test_version_update/configs/log_conf.xml b/tests/integration/test_version_update/configs/log_conf.xml index f9d15e572aa..17215c1759d 100644 --- a/tests/integration/test_version_update/configs/log_conf.xml +++ b/tests/integration/test_version_update/configs/log_conf.xml @@ -1,4 +1,4 @@ - + trace /var/log/clickhouse-server/log.log From b92a8f0fbcec640fbac35fca1b3fb23914d44990 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 07:16:25 +0200 Subject: [PATCH 123/132] Fix leftovers --- .../external-authenticators/kerberos.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/zh/operations/external-authenticators/kerberos.md b/docs/zh/operations/external-authenticators/kerberos.md index 649a0b9bd48..d1a39bbc952 100644 --- a/docs/zh/operations/external-authenticators/kerberos.md +++ b/docs/zh/operations/external-authenticators/kerberos.md @@ -23,30 +23,30 @@ slug: /zh/operations/external-authenticators/kerberos 示例 (进入 `config.xml`): ```xml - + - + ``` 主体规范: ```xml - + HTTP/clickhouse.example.com@EXAMPLE.COM - + ``` 按领域过滤: ```xml - + EXAMPLE.COM - + ``` !!! warning "注意" @@ -74,7 +74,7 @@ Kerberos主体名称格式通常遵循以下模式: 示例 (进入 `users.xml`): ``` - + @@ -85,7 +85,7 @@ Kerberos主体名称格式通常遵循以下模式: - + ``` !!! warning "警告" From 6f189e9eb7ded15df4ddbe7f90dafb28feaab2e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 07:17:17 +0200 Subject: [PATCH 124/132] Fix leftovers --- .../test_distributed_type_object/configs/remote_servers.xml | 2 +- .../configs/host_regexp.xml | 2 +- .../configs/listen_host.xml | 2 +- .../test_jbod_ha/configs/config.d/storage_configuration.xml | 2 +- tests/integration/test_server_reload/configs/default_passwd.xml | 2 +- .../test_server_reload/configs/overrides_from_zk.xml | 2 +- tests/integration/test_version_update/configs/log_conf.xml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_distributed_type_object/configs/remote_servers.xml b/tests/integration/test_distributed_type_object/configs/remote_servers.xml index 0ea61f0d5fc..68b420f36b4 100644 --- a/tests/integration/test_distributed_type_object/configs/remote_servers.xml +++ b/tests/integration/test_distributed_type_object/configs/remote_servers.xml @@ -15,4 +15,4 @@ - + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml index 0bf7fad9a70..9329c8dbde2 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/host_regexp.xml @@ -8,4 +8,4 @@ default - + diff --git a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml index 4f0841ab8b6..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_hosts_file_resolution/configs/listen_host.xml @@ -2,4 +2,4 @@ :: 0.0.0.0 1 - + diff --git a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml index b2c4645644a..fb9acc58ad6 100644 --- a/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml +++ b/tests/integration/test_jbod_ha/configs/config.d/storage_configuration.xml @@ -27,4 +27,4 @@ - + diff --git a/tests/integration/test_server_reload/configs/default_passwd.xml b/tests/integration/test_server_reload/configs/default_passwd.xml index 45ae005bd19..9d664cbf9c4 100644 --- a/tests/integration/test_server_reload/configs/default_passwd.xml +++ b/tests/integration/test_server_reload/configs/default_passwd.xml @@ -9,4 +9,4 @@ 123 - + diff --git a/tests/integration/test_server_reload/configs/overrides_from_zk.xml b/tests/integration/test_server_reload/configs/overrides_from_zk.xml index b17c5c9fa99..aa6105f6ebe 100644 --- a/tests/integration/test_server_reload/configs/overrides_from_zk.xml +++ b/tests/integration/test_server_reload/configs/overrides_from_zk.xml @@ -7,4 +7,4 @@ - + diff --git a/tests/integration/test_version_update/configs/log_conf.xml b/tests/integration/test_version_update/configs/log_conf.xml index 17215c1759d..27c7107ce5e 100644 --- a/tests/integration/test_version_update/configs/log_conf.xml +++ b/tests/integration/test_version_update/configs/log_conf.xml @@ -8,4 +8,4 @@ /var/log/clickhouse-server/stderr.log /var/log/clickhouse-server/stdout.log - + From 6016dc96aae57f38ef3ace1ed687b82bcc437425 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 07:19:54 +0200 Subject: [PATCH 125/132] Fix test `01172_transaction_counters` --- programs/server/config.d/transactions.xml | 1 + .../01172_transaction_counters.reference | 44 +++++++++---------- .../01172_transaction_counters.sql | 1 - 3 files changed, 23 insertions(+), 23 deletions(-) create mode 120000 programs/server/config.d/transactions.xml diff --git a/programs/server/config.d/transactions.xml b/programs/server/config.d/transactions.xml new file mode 120000 index 00000000000..be9de46b607 --- /dev/null +++ b/programs/server/config.d/transactions.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/transactions.xml \ No newline at end of file diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index 24083d7d40b..0fd73c7bcec 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -16,25 +16,25 @@ 7 all_3_3_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 7 all_4_4_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 8 1 -1 1 AddPart 1 1 1 1 all_1_1_0 -2 1 Begin 1 1 1 1 -2 1 AddPart 1 1 1 1 all_2_2_0 -2 1 Rollback 1 1 1 1 -3 1 Begin 1 1 1 1 -3 1 AddPart 1 1 1 1 all_3_3_0 -3 1 Commit 1 1 1 0 -1 1 LockPart 1 1 1 1 all_2_2_0 -4 1 Begin 1 1 1 1 -4 1 AddPart 1 1 1 1 all_4_4_0 -4 1 Commit 1 1 1 0 -5 1 Begin 1 1 1 1 -5 1 AddPart 1 1 1 1 all_5_5_0 -5 1 LockPart 1 1 1 1 all_1_1_0 -5 1 LockPart 1 1 1 1 all_3_3_0 -5 1 LockPart 1 1 1 1 all_4_4_0 -5 1 LockPart 1 1 1 1 all_5_5_0 -5 1 UnlockPart 1 1 1 1 all_1_1_0 -5 1 UnlockPart 1 1 1 1 all_3_3_0 -5 1 UnlockPart 1 1 1 1 all_4_4_0 -5 1 UnlockPart 1 1 1 1 all_5_5_0 -5 1 Rollback 1 1 1 1 +1 AddPart 1 1 1 1 all_1_1_0 +2 Begin 1 1 1 1 +2 AddPart 1 1 1 1 all_2_2_0 +2 Rollback 1 1 1 1 +3 Begin 1 1 1 1 +3 AddPart 1 1 1 1 all_3_3_0 +3 Commit 1 1 1 0 +1 LockPart 1 1 1 1 all_2_2_0 +4 Begin 1 1 1 1 +4 AddPart 1 1 1 1 all_4_4_0 +4 Commit 1 1 1 0 +5 Begin 1 1 1 1 +5 AddPart 1 1 1 1 all_5_5_0 +5 LockPart 1 1 1 1 all_1_1_0 +5 LockPart 1 1 1 1 all_3_3_0 +5 LockPart 1 1 1 1 all_4_4_0 +5 LockPart 1 1 1 1 all_5_5_0 +5 UnlockPart 1 1 1 1 all_1_1_0 +5 UnlockPart 1 1 1 1 all_3_3_0 +5 UnlockPart 1 1 1 1 all_4_4_0 +5 UnlockPart 1 1 1 1 all_5_5_0 +5 Rollback 1 1 1 1 diff --git a/tests/queries/0_stateless/01172_transaction_counters.sql b/tests/queries/0_stateless/01172_transaction_counters.sql index a809e4196e9..581b45cd15c 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.sql +++ b/tests/queries/0_stateless/01172_transaction_counters.sql @@ -42,7 +42,6 @@ rollback; system flush logs; select indexOf((select arraySort(groupUniqArray(tid)) from system.transactions_info_log where database=currentDatabase() and table='txn_counters'), tid), - (toDecimal64(now64(6), 6) - toDecimal64(event_time, 6)) < 100, type, thread_id!=0, length(query_id)=length(queryID()) or type='Commit' and query_id='', -- ignore fault injection after commit From 574c445be9368ee481e8d213251106233d417f69 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 7 Aug 2024 16:29:07 +0000 Subject: [PATCH 126/132] Refactor tests for (experimental) statistics --- docs/en/development/tests.md | 4 +- .../statements/alter/statistics.md | 16 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- ...2864_statistics_count_min_sketch.reference | 14 -- .../02864_statistics_count_min_sketch.sql | 70 ------ .../02864_statistics_ddl.reference | 37 +-- .../0_stateless/02864_statistics_ddl.sql | 234 ++++++++++++++---- ...delayed_materialization_in_merge.reference | 12 + ...stics_delayed_materialization_in_merge.sql | 36 +++ .../02864_statistics_exception.reference | 0 .../02864_statistics_exception.sql | 55 ---- ..._statistics_materialize_in_merge.reference | 10 - .../02864_statistics_materialize_in_merge.sql | 52 ---- .../02864_statistics_predicates.reference | 98 ++++++++ .../02864_statistics_predicates.sql | 214 ++++++++++++++++ .../02864_statistics_uniq.reference | 35 --- .../0_stateless/02864_statistics_uniq.sql | 73 ------ .../02864_statistics_usage.reference | 20 ++ .../0_stateless/02864_statistics_usage.sql | 42 ++++ 19 files changed, 625 insertions(+), 399 deletions(-) delete mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.sql create mode 100644 tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference create mode 100644 tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_exception.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_exception.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql create mode 100644 tests/queries/0_stateless/02864_statistics_predicates.reference create mode 100644 tests/queries/0_stateless/02864_statistics_predicates.sql delete mode 100644 tests/queries/0_stateless/02864_statistics_uniq.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_uniq.sql create mode 100644 tests/queries/0_stateless/02864_statistics_usage.reference create mode 100644 tests/queries/0_stateless/02864_statistics_usage.sql diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 269995a1a96..6cb36e2049b 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public. -Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. +Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. :::note A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs @@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes ### Adding a New Test -To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. diff --git a/docs/en/sql-reference/statements/alter/statistics.md b/docs/en/sql-reference/statements/alter/statistics.md index 6880cef0e5c..7a1774a01b5 100644 --- a/docs/en/sql-reference/statements/alter/statistics.md +++ b/docs/en/sql-reference/statements/alter/statistics.md @@ -8,26 +8,28 @@ sidebar_label: STATISTICS The following operations are available: -- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata. +- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata. -- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata. +- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata. -- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. +- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns. -- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. +- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`. -- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations). The first two commands are lightweight in a sense that they only change metadata or remove files. Also, they are replicated, syncing statistics metadata via ZooKeeper. -There is an example adding two statistics types to two columns: +## Example: + +Adding two statistics types to two columns: ``` ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq; ``` :::note -Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). +Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants). ::: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 625b1281c61..c7101021f02 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3517,7 +3517,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context const auto & new_column = new_metadata.getColumns().get(command.column_name); if (!old_column.type->equals(*new_column.type)) throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, - "ALTER types of column {} with statistics is not not safe " + "ALTER types of column {} with statistics is not safe " "because it can change the representation of statistics", backQuoteIfNeed(command.column_name)); } diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference deleted file mode 100644 index 02c41656a36..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics count_min: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) -Test statistics multi-types: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test LowCardinality and Nullable data type: -tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql deleted file mode 100644 index c730aa7b4a7..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql +++ /dev/null @@ -1,70 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS tab SYNC; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET allow_suspicious_low_cardinality_types=1; -SET mutations_sync = 2; - -CREATE TABLE tab -( - a String, - b UInt64, - c Int64, - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE tab; - -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'Test statistics count_min:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min; -ALTER TABLE tab ADD STATISTICS c TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - - -SELECT 'Test statistics multi-types:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab DROP STATISTICS a, b, c; - -DROP TABLE IF EXISTS tab SYNC; - - -SELECT 'Test LowCardinality and Nullable data type:'; -DROP TABLE IF EXISTS tab2 SYNC; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE tab2 -( - a LowCardinality(Int64) STATISTICS(count_min), - b Nullable(Int64) STATISTICS(count_min), - c LowCardinality(Nullable(Int64)) STATISTICS(count_min), - pk String, -) Engine = MergeTree() ORDER BY pk; - -select name from system.tables where name = 'tab2' and database = currentDatabase(); - -DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_ddl.reference b/tests/queries/0_stateless/02864_statistics_ddl.reference index a7ff5caa0b0..0e453b0ee8a 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.reference +++ b/tests/queries/0_stateless/02864_statistics_ddl.reference @@ -1,31 +1,6 @@ -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -10 -0 -After drop statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(a, 10)) (removed) -10 -CREATE TABLE default.tab\n(\n `a` Float64,\n `b` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After add statistic -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After materialize statistic - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(b, 10)) (removed) -20 -CREATE TABLE default.tab\n(\n `a` Float64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After rename - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, 10)) (removed) -20 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64 STATISTICS(tdigest, uniq),\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32 STATISTICS(tdigest, uniq),\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `f64` Float64,\n `f64_tdigest` Float64 STATISTICS(tdigest),\n `f32` Float32,\n `s` String,\n `a` Array(Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index fe612efe2ac..32b56a842b7 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -1,59 +1,195 @@ --- Tests that various DDL statements create/drop/materialize statistics +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests that DDL statements which create / drop / materialize statistics + +SET mutations_sync = 1; DROP TABLE IF EXISTS tab; +-- Error case: Can't create statistics when allow_experimental_statistics = 0 +CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; + +-- Error case: Unknown statistics types are rejected +CREATE TABLE tab (col Float64 STATISTICS(no_statistics_type)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +-- Error case: The same statistics type can't exist more than once on a column +CREATE TABLE tab (col Float64 STATISTICS(tdigest, tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } + +SET allow_suspicious_low_cardinality_types = 1; + +-- Statistics can only be created on columns of specific data types (depending on the statistics kind), (*) + +-- tdigest requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- uniq requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- count_min requires data_type.isValueRepresentedByInteger or data_type = (Fixed)String +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col String STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col FixedString(1) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col Array(Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + +-- CREATE TABLE was easy, ALTER is more fun CREATE TABLE tab ( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f32 Float32, + s String, + a Array(Float64) +) +Engine = MergeTree() +ORDER BY tuple(); +-- Error case: Unknown statistics types are rejected +-- (relevant for ADD and MODIFY) +ALTER TABLE tab ADD STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } +-- for some reason, ALTER TABLE tab MODIFY STATISTICS IF EXISTS is not supported + +-- Error case: The same statistics type can't exist more than once on a column +-- (relevant for ADD and MODIFY) +-- Create the same statistics object twice +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest, tdigest; -- { serverError INCORRECT_QUERY } +-- Create an statistics which exists already +ALTER TABLE tab ADD STATISTICS f64_tdigest TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS f64_tdigest TYPE tdigest; -- no-op +ALTER TABLE tab MODIFY STATISTICS f64_tdigest TYPE tdigest; -- no-op + +-- Error case: Column does not exist +-- (relevant for ADD, MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab ADD STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab ADD STATISTICS IF NOT EXISTS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS no_such_column TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab CLEAR STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS no_such_column; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS no_such_column; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS no_such_column; -- { serverError ILLEGAL_STATISTICS } + +-- Error case: Column exists but has no statistics +-- (relevant for MODIFY, DROP, CLEAR, and MATERIALIZE) +-- Note that the results are unfortunately quite inconsistent ... +ALTER TABLE tab MODIFY STATISTICS s TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab DROP STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab CLEAR STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab CLEAR STATISTICS IF EXISTS s; -- no-op +ALTER TABLE tab MATERIALIZE STATISTICS s; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MATERIALIZE STATISTICS IF EXISTS s; -- { serverError ILLEGAL_STATISTICS } + +-- We don't check systematically that that statistics can only be created via ALTER ADD STATISTICS on columns of specific data types (the +-- internal type validation code is tested already above, (*)). Only do a rudimentary check for each statistics type with a data type that +-- works and one that doesn't work. +-- tdigest +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE tdigest; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } +-- uniq +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE uniq; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE uniq; -- { serverError ILLEGAL_STATISTICS } +-- count_min +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } + +-- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing +-- statistics objects (e.g. tdigest can be created on Float64 and UInt64) +ALTER TABLE tab MODIFY COLUMN f64_tdigest UInt64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } + +-- Finally, do a full-circle test of a good case. Print table definition after each step. +-- Intentionally specifying _two_ columns and _two_ statistics types to have that also tested. +SHOW CREATE TABLE tab; +ALTER TABLE tab ADD STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab MODIFY STATISTICS f64, f32 TYPE tdigest, uniq; +SHOW CREATE TABLE tab; +ALTER TABLE tab CLEAR STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab MATERIALIZE STATISTICS f64, f32; +SHOW CREATE TABLE tab; +ALTER TABLE tab DROP STATISTICS f64, f32; SHOW CREATE TABLE tab; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; -SELECT count(*) FROM tab WHERE b < NULL and a < '10'; - -ALTER TABLE tab DROP STATISTICS a, b; - -SELECT 'After drop statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; - -SELECT 'After add statistic'; - -SHOW CREATE TABLE tab; - -ALTER TABLE tab MATERIALIZE STATISTICS a, b; -INSERT INTO tab select number, -number, generateUUIDv4() FROM system.numbers LIMIT 10000; - -SELECT 'After materialize statistic'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -OPTIMIZE TABLE tab FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE b < 10 and a < 10; - -ALTER TABLE tab RENAME COLUMN b TO c; -SHOW CREATE TABLE tab; - -SELECT 'After rename'; -SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT count(*) FROM tab WHERE c < 10 and a < 10; - -DROP TABLE IF EXISTS tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference new file mode 100644 index 00000000000..eb5e685597c --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference @@ -0,0 +1,12 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After truncate, insert, and materialize + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql new file mode 100644 index 00000000000..d469a4c2036 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.sql @@ -0,0 +1,36 @@ +-- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +DROP TABLE IF EXISTS tab; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET enable_analyzer = 1; + +SET materialize_statistics_on_insert = 0; + +CREATE TABLE tab +( + a Int64 STATISTICS(tdigest), + b Int16 STATISTICS(tdigest), +) ENGINE = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. + +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +TRUNCATE TABLE tab; +SET mutations_sync = 2; +INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +SELECT 'After truncate, insert, and materialize'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_exception.reference b/tests/queries/0_stateless/02864_statistics_exception.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02864_statistics_exception.sql b/tests/queries/0_stateless/02864_statistics_exception.sql deleted file mode 100644 index 289ffee6600..00000000000 --- a/tests/queries/0_stateless/02864_statistics_exception.sql +++ /dev/null @@ -1,55 +0,0 @@ --- Tests creating/dropping/materializing statistics produces the right exceptions. - -DROP TABLE IF EXISTS tab; - --- Can't create statistics when allow_experimental_statistics = 0 -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - -SET allow_experimental_statistics = 1; - --- The same type of statistics can't exist more than once on a column -CREATE TABLE tab -( - a Float64 STATISTICS(tdigest, tdigest) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- Unknown statistics types are rejected -CREATE TABLE tab -( - a Float64 STATISTICS(no_statistics_type) -) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } - --- tDigest statistics can only be created on numeric columns -CREATE TABLE tab -( - a String STATISTICS(tdigest), -) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } - -CREATE TABLE tab -( - a Float64, - b String -) Engine = MergeTree() ORDER BY tuple(); - -ALTER TABLE tab ADD STATISTICS a TYPE no_statistics_type; -- { serverError INCORRECT_QUERY } -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS IF NOT EXISTS a TYPE tdigest; -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab MODIFY STATISTICS a TYPE tdigest; --- Statistics can be created only on integer columns -ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS b; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab DROP STATISTICS a; -ALTER TABLE tab DROP STATISTICS IF EXISTS a; -ALTER TABLE tab CLEAR STATISTICS a; -- { serverError ILLEGAL_STATISTICS } -ALTER TABLE tab CLEAR STATISTICS IF EXISTS a; -ALTER TABLE tab MATERIALIZE STATISTICS b; -- { serverError ILLEGAL_STATISTICS } - -ALTER TABLE tab ADD STATISTICS a TYPE tdigest; -ALTER TABLE tab MODIFY COLUMN a Float64 TTL toDateTime(b) + INTERVAL 1 MONTH; -ALTER TABLE tab MODIFY COLUMN a Int64; -- { serverError ALTER_OF_COLUMN_IS_FORBIDDEN } - -DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference deleted file mode 100644 index 5e969cf41cb..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.reference +++ /dev/null @@ -1,10 +0,0 @@ -10 -10 -10 -statistics not used Condition less(b, 10_UInt8) moved to PREWHERE -statistics not used Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after merge Condition less(b, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(a, 10_UInt8) moved to PREWHERE -statistics used after materialize Condition less(b, 10_UInt8) moved to PREWHERE -2 0 diff --git a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql b/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql deleted file mode 100644 index 6606cff263f..00000000000 --- a/tests/queries/0_stateless/02864_statistics_materialize_in_merge.sql +++ /dev/null @@ -1,52 +0,0 @@ --- Tests delayed materialization of statistics in merge instead of during insert (setting 'materialize_statistics_on_insert = 0'). - -DROP TABLE IF EXISTS tab; - -SET enable_analyzer = 1; -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; - -SET materialize_statistics_on_insert = 0; - -CREATE TABLE tab -( - a Int64 STATISTICS(tdigest), - b Int16 STATISTICS(tdigest), -) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0; -- TODO: there is a bug in vertical merge with statistics. - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics not used'; - -OPTIMIZE TABLE tab FINAL; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after merge'; - -TRUNCATE TABLE tab; -SET mutations_sync = 2; - -INSERT INTO tab SELECT number, -number FROM system.numbers LIMIT 10000; -ALTER TABLE tab MATERIALIZE STATISTICS a, b; - -SELECT count(*) FROM tab WHERE b < 10 and a < 10 SETTINGS log_comment = 'statistics used after materialize'; - -DROP TABLE tab; - -SYSTEM FLUSH LOGS; - -SELECT log_comment, message FROM system.text_log JOIN -( - SELECT Settings['log_comment'] AS log_comment, query_id FROM system.query_log - WHERE current_database = currentDatabase() - AND query LIKE 'SELECT count(*) FROM tab%' - AND type = 'QueryFinish' -) AS query_log USING (query_id) -WHERE message LIKE '%moved to PREWHERE%' -ORDER BY event_time_microseconds; - -SELECT count(), sum(ProfileEvents['MergeTreeDataWriterStatisticsCalculationMicroseconds']) -FROM system.query_log -WHERE current_database = currentDatabase() - AND query LIKE 'INSERT INTO tab SELECT%' - AND type = 'QueryFinish'; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference new file mode 100644 index 00000000000..ffbd7269e05 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.reference @@ -0,0 +1,98 @@ +u64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +u64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +70 +f64 and = +10 +10 +10 +10 +0 +0 +0 +0 +10 +10 +10 +10 +0 +0 +0 +0 +f64 and < +70 +70 +70 +70 +80 +80 +80 +80 +70 +70 +70 +70 +80 +80 +80 +80 +dt and = +0 +0 +0 +0 +10 +10 +10 +10 +dt and < +10000 +10000 +10000 +10000 +70 +70 +70 +70 +b and = +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +5000 +0 +0 +0 +0 +s and = +10 +10 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql new file mode 100644 index 00000000000..779116cf19a --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -0,0 +1,214 @@ +-- Tags: no-fasttest +-- no-fasttest: 'count_min' sketches need a 3rd party library + +-- Tests the cross product of all predicates with all right-hand sides on all data types and all statistics types. + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + u64 UInt64, + u64_tdigest UInt64 STATISTICS(tdigest), + u64_count_min UInt64 STATISTICS(count_min), + u64_uniq UInt64 STATISTICS(uniq), + f64 Float64, + f64_tdigest Float64 STATISTICS(tdigest), + f64_count_min Float64 STATISTICS(count_min), + f64_uniq Float64 STATISTICS(uniq), + dt DateTime, + dt_tdigest DateTime STATISTICS(tdigest), + dt_count_min DateTime STATISTICS(count_min), + dt_uniq DateTime STATISTICS(uniq), + b Bool, + b_tdigest Bool STATISTICS(tdigest), + b_count_min Bool STATISTICS(count_min), + b_uniq Bool STATISTICS(uniq), + s String, + -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest + s_count_min String STATISTICS(count_min) + -- s_uniq String STATISTICS(uniq), -- not supported by uniq +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab +-- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000; +SELECT number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 1000, + number % 2, + number % 2, + number % 2, + number % 2, + toString(number % 1000), + toString(number % 1000) +FROM system.numbers LIMIT 10000; + +-- u64 ---------------------------------------------------- + +SELECT 'u64 and ='; + +SELECT count(*) FROM tab WHERE u64 = 7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7; +SELECT count(*) FROM tab WHERE u64_count_min = 7; +SELECT count(*) FROM tab WHERE u64_uniq = 7; + +SELECT count(*) FROM tab WHERE u64 = 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE u64_count_min = 7.7; +SELECT count(*) FROM tab WHERE u64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE u64 = '7'; +SELECT count(*) FROM tab WHERE u64_tdigest = '7'; +SELECT count(*) FROM tab WHERE u64_count_min = '7'; +SELECT count(*) FROM tab WHERE u64_uniq = '7'; + +SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } + +SELECT 'u64 and <'; + +SELECT count(*) FROM tab WHERE u64 < 7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7; +SELECT count(*) FROM tab WHERE u64_count_min < 7; +SELECT count(*) FROM tab WHERE u64_uniq < 7; + +SELECT count(*) FROM tab WHERE u64 < 7.7; +SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE u64_count_min < 7.7; +SELECT count(*) FROM tab WHERE u64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE u64 < '7'; +SELECT count(*) FROM tab WHERE u64_tdigest < '7'; +SELECT count(*) FROM tab WHERE u64_count_min < '7'; +SELECT count(*) FROM tab WHERE u64_uniq < '7'; + +SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } + +-- f64 ---------------------------------------------------- + +SELECT 'f64 and ='; + +SELECT count(*) FROM tab WHERE f64 = 7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7; +SELECT count(*) FROM tab WHERE f64_count_min = 7; +SELECT count(*) FROM tab WHERE f64_uniq = 7; + +SELECT count(*) FROM tab WHERE f64 = 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE f64_count_min = 7.7; +SELECT count(*) FROM tab WHERE f64_uniq = 7.7; + +SELECT count(*) FROM tab WHERE f64 = '7'; +SELECT count(*) FROM tab WHERE f64_tdigest = '7'; +SELECT count(*) FROM tab WHERE f64_count_min = '7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7'; + +SELECT count(*) FROM tab WHERE f64 = '7.7'; +SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; +SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; + +SELECT 'f64 and <'; + +SELECT count(*) FROM tab WHERE f64 < 7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7; +SELECT count(*) FROM tab WHERE f64_count_min < 7; +SELECT count(*) FROM tab WHERE f64_uniq < 7; + +SELECT count(*) FROM tab WHERE f64 < 7.7; +SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE f64_count_min < 7.7; +SELECT count(*) FROM tab WHERE f64_uniq < 7.7; + +SELECT count(*) FROM tab WHERE f64 < '7'; +SELECT count(*) FROM tab WHERE f64_tdigest < '7'; +SELECT count(*) FROM tab WHERE f64_count_min < '7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7'; + +SELECT count(*) FROM tab WHERE f64 < '7.7'; +SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; +SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; +SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; + +-- dt ---------------------------------------------------- + +SELECT 'dt and ='; + +SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt = 7; +SELECT count(*) FROM tab WHERE dt_tdigest = 7; +SELECT count(*) FROM tab WHERE dt_count_min = 7; +SELECT count(*) FROM tab WHERE dt_uniq = 7; + +SELECT 'dt and <'; + +SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; + +SELECT count(*) FROM tab WHERE dt < 7; +SELECT count(*) FROM tab WHERE dt_tdigest < 7; +SELECT count(*) FROM tab WHERE dt_count_min < 7; +SELECT count(*) FROM tab WHERE dt_uniq < 7; + +-- b ---------------------------------------------------- + +SELECT 'b and ='; + +SELECT count(*) FROM tab WHERE b = true; +SELECT count(*) FROM tab WHERE b_tdigest = true; +SELECT count(*) FROM tab WHERE b_count_min = true; +SELECT count(*) FROM tab WHERE b_uniq = true; + +SELECT count(*) FROM tab WHERE b = 'true'; +SELECT count(*) FROM tab WHERE b_tdigest = 'true'; +SELECT count(*) FROM tab WHERE b_count_min = 'true'; +SELECT count(*) FROM tab WHERE b_uniq = 'true'; + +SELECT count(*) FROM tab WHERE b = 1; +SELECT count(*) FROM tab WHERE b_tdigest = 1; +SELECT count(*) FROM tab WHERE b_count_min = 1; +SELECT count(*) FROM tab WHERE b_uniq = 1; + +SELECT count(*) FROM tab WHERE b = 1.1; +SELECT count(*) FROM tab WHERE b_tdigest = 1.1; +SELECT count(*) FROM tab WHERE b_count_min = 1.1; +SELECT count(*) FROM tab WHERE b_uniq = 1.1; + +-- s ---------------------------------------------------- + +SELECT 's and ='; + +SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } +-- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported + +SELECT count(*) FROM tab WHERE s = '7'; +-- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported +SELECT count(*) FROM tab WHERE s_count_min = '7'; +-- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02864_statistics_uniq.reference b/tests/queries/0_stateless/02864_statistics_uniq.reference deleted file mode 100644 index 77786dbdd8c..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.reference +++ /dev/null @@ -1,35 +0,0 @@ -CREATE TABLE default.t1\n(\n `a` Float64 STATISTICS(tdigest),\n `b` Int64 STATISTICS(tdigest),\n `c` Int64 STATISTICS(tdigest, uniq),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -After insert - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After merge - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(c, 11), less(a, 10), less(b, 10)) (removed) -After modify TDigest - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(c, -1), less(a, 10), less(b, 10)) (removed) -After drop - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 11), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), equals(c, 0), less(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(a, 10), less(c, -1), less(b, 10)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_uniq.sql b/tests/queries/0_stateless/02864_statistics_uniq.sql deleted file mode 100644 index 0f5f353c045..00000000000 --- a/tests/queries/0_stateless/02864_statistics_uniq.sql +++ /dev/null @@ -1,73 +0,0 @@ -DROP TABLE IF EXISTS t1; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET mutations_sync = 1; - -CREATE TABLE t1 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Int64 STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE t1; - -INSERT INTO t1 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; -INSERT INTO t1 select 0, 0, 11, generateUUIDv4(); - -SELECT 'After insert'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -OPTIMIZE TABLE t1 FINAL; - -SELECT 'After merge'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -SELECT 'After modify TDigest'; -ALTER TABLE t1 MODIFY STATISTICS c TYPE TDigest; -ALTER TABLE t1 MATERIALIZE STATISTICS c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - - -ALTER TABLE t1 DROP STATISTICS c; - -SELECT 'After drop'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 11 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c = 0 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM t1 WHERE b < 10 and c < -1 and a < 10) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE t2 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c LowCardinality(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t2 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t2; -DROP TABLE IF EXISTS t3; - -CREATE TABLE t3 -( - a Float64 STATISTICS(tdigest), - b Int64 STATISTICS(tdigest), - c Nullable(Int64) STATISTICS(tdigest, uniq), - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; -INSERT INTO t3 select number, -number, number/1000, generateUUIDv4() FROM system.numbers LIMIT 10000; - -DROP TABLE IF EXISTS t3; - diff --git a/tests/queries/0_stateless/02864_statistics_usage.reference b/tests/queries/0_stateless/02864_statistics_usage.reference new file mode 100644 index 00000000000..a9f669b88c1 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.reference @@ -0,0 +1,20 @@ +After insert + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After drop statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed) +After add and materialize statistic + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After merge + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed) +After rename + Prewhere info + Prewhere filter + Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_usage.sql b/tests/queries/0_stateless/02864_statistics_usage.sql new file mode 100644 index 00000000000..4956bd27e87 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_usage.sql @@ -0,0 +1,42 @@ +-- Test that the optimizer picks up column statistics +-- (The concrete statistics type, column data type and predicate type don't matter) + +-- Checks by the predicate evaluation order in EXPLAIN. This is quite fragile, a better approach would be helpful (maybe 'send_logs_level'?) + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET mutations_sync = 1; +SET enable_analyzer = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + a Float64 STATISTICS(tdigest), + b Int64 STATISTICS(tdigest) +) Engine = MergeTree() ORDER BY tuple() +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After insert'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab DROP STATISTICS a, b; +SELECT 'After drop statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks b first, then a (statistics not used) + +ALTER TABLE tab ADD STATISTICS a, b TYPE tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b; +INSERT INTO tab select number, -number FROM system.numbers LIMIT 10000; +SELECT 'After add and materialize statistic'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +OPTIMIZE TABLE tab FINAL; +SELECT 'After merge'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then b (statistics used) + +ALTER TABLE tab RENAME COLUMN b TO c; +SELECT 'After rename'; +SELECT replaceRegexpAll(explain, '__table1\.', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 10 and a < 10) WHERE explain LIKE '%Prewhere%'; -- checks a first, then c (statistics used) + +DROP TABLE IF EXISTS tab; From ee433684ddc4614a5bb93dbfd1e3b481a2f343d6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 7 Aug 2024 10:55:16 +0000 Subject: [PATCH 127/132] Follow-up to ClickHouse#63898, pt. III --- .../02421_simple_queries_for_opentelemetry.sh | 10 +++---- .../02458_insert_select_progress_tcp.sh | 2 +- .../02476_analyzer_identifier_hints.sh | 8 +++--- .../0_stateless/02480_tets_show_full.sh | 2 +- .../0_stateless/02482_load_parts_refcounts.sh | 2 +- .../02496_remove_redundant_sorting.sh | 6 ++--- .../02497_storage_file_reader_selection.sh | 2 +- .../02500_remove_redundant_distinct.sh | 6 ++--- ...atabase_replicated_no_arguments_for_rmt.sh | 4 +-- .../0_stateless/02532_send_logs_level_test.sh | 2 +- .../02555_davengers_rename_chain.sh | 8 +++--- ...02572_query_views_log_background_thread.sh | 8 +++--- .../02703_max_local_read_bandwidth.sh | 4 +-- .../02703_max_local_write_bandwidth.sh | 4 +-- .../0_stateless/02704_max_backup_bandwidth.sh | 4 +-- .../0_stateless/02724_limit_num_mutations.sh | 8 +++--- .../02725_async_insert_table_setting.sh | 4 +-- .../queries/0_stateless/02841_local_assert.sh | 6 ++--- ...rge_across_partitions_final_with_lonely.sh | 6 ++--- .../02871_clickhouse_client_restart_pager.sh | 2 +- .../02875_clickhouse_local_multiquery.sh | 4 +-- .../02875_merge_engine_set_index.sh | 4 +-- ...tion_table_with_explicit_insert_columns.sh | 2 +- ...ture_from_insertion_table_with_defaults.sh | 2 +- .../02883_named_collections_override.sh | 2 +- .../02884_async_insert_native_protocol_1.sh | 4 +-- .../02884_async_insert_native_protocol_2.sh | 4 +-- .../02884_async_insert_native_protocol_3.sh | 4 +-- .../02884_async_insert_native_protocol_4.sh | 4 +-- .../02885_ephemeral_columns_from_file.sh | 2 +- .../0_stateless/02895_npy_output_format.sh | 2 +- ...ak_memory_usage_http_headers_regression.sh | 2 +- .../02903_empty_order_by_throws_error.sh | 4 +-- .../02903_rmt_retriable_merge_exception.sh | 6 ++--- ...904_empty_order_by_with_setting_enabled.sh | 8 +++--- .../02907_backup_mv_with_no_inner_table.sh | 8 +++--- .../02907_backup_mv_with_no_source_table.sh | 12 ++++----- .../02907_backup_restore_default_nullable.sh | 4 +-- .../02907_backup_restore_flatten_nested.sh | 8 +++--- .../02907_clickhouse_dictionary_bug.sh | 2 +- .../02907_system_backups_profile_events.sh | 6 ++--- .../0_stateless/02908_Npy_files_caching.sh | 4 +-- .../0_stateless/02908_table_ttl_dependency.sh | 6 ++--- .../02909_settings_in_json_schema_cache.sh | 2 +- .../02915_input_table_function_in_subquery.sh | 2 +- .../02915_lazy_loading_of_base_backups.sh | 26 +++++++++---------- .../0_stateless/02916_dictionary_access.sh | 8 +++--- .../0_stateless/02916_joinget_dependency.sh | 6 ++--- .../02930_client_file_log_comment.sh | 2 +- ...lumn_use_structure_from_insertion_table.sh | 2 +- .../02940_system_stacktrace_optimizations.sh | 6 ++--- ..._alter_metadata_merge_checksum_mismatch.sh | 8 +++--- .../02947_merge_tree_index_table_3.sh | 4 +-- ...2950_dictionary_ssd_cache_short_circuit.sh | 2 +- .../02950_distributed_initial_query_event.sh | 2 +- .../02974_backup_query_format_null.sh | 4 +-- ...ert_select_resize_to_max_insert_threads.sh | 2 +- .../03008_deduplication_random_setttings.sh | 6 ++--- .../03008_local_plain_rewritable.sh | 14 +++++----- .../03031_clickhouse_local_input.sh | 8 +++--- .../0_stateless/03032_async_backup_restore.sh | 4 +-- .../03096_http_interface_role_query_param.sh | 4 +-- ...03140_client_subsequent_external_tables.sh | 2 +- .../03143_prewhere_profile_events.sh | 12 ++++----- .../03145_non_loaded_projection_backup.sh | 12 ++++----- .../03155_test_move_to_prewhere.sh | 4 +-- .../03156_default_multiquery_split.sh | 2 +- .../0_stateless/03169_time_virtual_column.sh | 2 +- .../03173_parallel_replicas_join_bug.sh | 2 +- .../03198_settings_in_csv_tsv_schema_cache.sh | 8 +++--- .../03198_unload_primary_key_outdated.sh | 4 +-- .../03199_dictionary_table_access.sh | 8 +++--- ...s_to_read_for_schema_inference_in_cache.sh | 2 +- 73 files changed, 186 insertions(+), 186 deletions(-) diff --git a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh index 98b571c5968..91e85eabcb8 100755 --- a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh +++ b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # $2 - query function execute_query() { - ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=1 --query_id $1 -nq " + ${CLICKHOUSE_CLIENT} --opentelemetry_start_trace_probability=1 --query_id $1 -q " ${2} " } @@ -18,7 +18,7 @@ function execute_query() # so we only to check the db.statement only function check_query_span_query_only() { -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT attribute['db.statement'] as query FROM system.opentelemetry_span_log @@ -31,7 +31,7 @@ ${CLICKHOUSE_CLIENT} -nq " function check_query_span() { -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; SELECT attribute['db.statement'] as query, attribute['clickhouse.read_rows'] as read_rows, @@ -47,7 +47,7 @@ ${CLICKHOUSE_CLIENT} -nq " # # Set up # -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.opentelemetry_test; CREATE TABLE ${CLICKHOUSE_DATABASE}.opentelemetry_test (id UInt64) Engine=MergeTree Order By id; " @@ -79,4 +79,4 @@ check_query_span $query_id # ${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.opentelemetry_test; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh index ae3ea017fbb..178da822d41 100755 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists insert_select_progress_tcp; create table insert_select_progress_tcp(s UInt16) engine = MergeTree order by s; " diff --git a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh index 4c850a6ec9e..92f519a9f8a 100755 --- a/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh +++ b/tests/queries/0_stateless/02476_analyzer_identifier_hints.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table; CREATE TABLE test_table ( @@ -74,7 +74,7 @@ $CLICKHOUSE_CLIENT -q "SELECT 1 AS constant_value, arrayMap(lambda_argument -> l $CLICKHOUSE_CLIENT -q "WITH 1 AS constant_value SELECT (SELECT constant_valu) SETTINGS enable_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['constant_value'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table_compound; CREATE TABLE test_table_compound ( @@ -142,7 +142,7 @@ $CLICKHOUSE_CLIENT -q "SELECT cast(tuple(1), 'Tuple(value_1 String)') AS constan $CLICKHOUSE_CLIENT -q "WITH cast(tuple(1), 'Tuple(value_1 String)') AS constant_value SELECT (SELECT constant_value.value_) SETTINGS enable_analyzer = 1;" 2>&1 \ | grep "Maybe you meant: \['constant_value.value_1'\]" &>/dev/null; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS test_table_1; CREATE TABLE test_table_1 ( @@ -185,7 +185,7 @@ $CLICKHOUSE_CLIENT -q "SELECT ((1))::Tuple(a Tuple(b UInt32)) AS t, t.a.c SETTIN $CLICKHOUSE_CLIENT -q "SELECT 1"; -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE test_table; DROP TABLE test_table_compound; DROP TABLE test_table_1; diff --git a/tests/queries/0_stateless/02480_tets_show_full.sh b/tests/queries/0_stateless/02480_tets_show_full.sh index 5f5040ba128..50184857a1f 100755 --- a/tests/queries/0_stateless/02480_tets_show_full.sh +++ b/tests/queries/0_stateless/02480_tets_show_full.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) database=$($CLICKHOUSE_CLIENT -q 'SELECT currentDatabase()') -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS test_02480_table; DROP VIEW IF EXISTS test_02480_view; CREATE TABLE test_02480_table (id Int64) ENGINE=MergeTree ORDER BY id; diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 5303824d97c..4dc7a7fd99b 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS load_parts_refcounts SYNC; CREATE TABLE load_parts_refcounts (id UInt32) diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index c9bd242e429..6e132c55628 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -26,15 +26,15 @@ FROM ORDER BY number DESC ) ORDER BY number ASC" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;$1" } echo "-- Enabled query_plan_remove_redundant_sorting" diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index aa43e81f131..27243dd47fa 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -13,6 +13,6 @@ $CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SEL $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -q "CreatedReadBufferMMap" && echo 'Fail' || echo 0 $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -q "CreatedReadBufferOrdinary" && echo 1 || echo 'Fail' -$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -nq "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" +$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -q "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 3c06119e8d2..6fd42fa940a 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -24,15 +24,15 @@ FROM ) )" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN $query" +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;EXPLAIN $query" function run_query { echo "-- query" echo "$1" echo "-- explain" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN $1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;EXPLAIN $1" echo "-- execute" - $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" + $CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;$1" } echo "-- Enabled $OPTIMIZATION_SETTING" diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh index a0f228e6af4..c1aa24943c1 100755 --- a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -14,8 +14,8 @@ ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings databa ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index 506ac2331f2..a50539311cb 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1; insert into data values (1); diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.sh b/tests/queries/0_stateless/02555_davengers_rename_chain.sh index 660a95846c4..196507dc72e 100755 --- a/tests/queries/0_stateless/02555_davengers_rename_chain.sh +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.sh @@ -46,7 +46,7 @@ tables["wrong_metadata_compact"]="min_bytes_for_wide_part = 10000000" for table in "${!tables[@]}"; do settings="${tables[$table]}" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS $table; CREATE TABLE $table( @@ -69,7 +69,7 @@ for table in "${!tables[@]}"; do wait_column "$table" "\`a1\` UInt64" || exit 2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; @@ -82,7 +82,7 @@ for table in "${!tables[@]}"; do wait_mutation_loaded "$table" "b1 TO a" || exit 2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; @@ -94,7 +94,7 @@ for table in "${!tables[@]}"; do wait_for_all_mutations "$table" - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" -- { echoOn } SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh index 509cd03f6c2..22b94e09b58 100755 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; +${CLICKHOUSE_CLIENT} --ignore-error --query "drop table if exists buffer_02572; drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" ${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" @@ -21,7 +21,7 @@ ${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" if [ $(( $(date +%s) - start )) -gt 6 ]; then # clickhouse test cluster is overloaded, will skip # ensure that the flush was not direct - ${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + ${CLICKHOUSE_CLIENT} --ignore-error --query "select * from data_02572; select * from copy_02572;" fi # we cannot use OPTIMIZE, this will attach query context, so let's wait @@ -31,11 +31,11 @@ for _ in {1..100}; do done -${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" +${CLICKHOUSE_CLIENT} --ignore-error --query "select * from data_02572; select * from copy_02572;" ${CLICKHOUSE_CLIENT} --query="system flush logs;" ${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) from system.query_views_log where view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3;" \ No newline at end of file + group by 2, 3;" diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index 03e0f363d71..79253648475 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -26,7 +26,7 @@ read_methods=( for read_method in "${read_methods[@]}"; do query_id=$(random_str 10) $CLICKHOUSE_CLIENT --query_id "$query_id" -q "select * from data format Null settings max_local_read_bandwidth='1M', local_filesystem_read_method='$read_method'" - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT '$read_method', diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 4f6a300c5b3..c5776134673 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -nm -q " query_id=$(random_str 10) # writes 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds $CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data select * from numbers(1e6) settings max_local_write_bandwidth='1M'" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT query_duration_ms >= 7e3, diff --git a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh index 8cb03a93a7a..7e914c4c539 100755 --- a/tests/queries/0_stateless/02704_max_backup_bandwidth.sh +++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data; create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9; " @@ -15,7 +15,7 @@ $CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)" query_id=$(random_str 10) $CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('backups', '$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT query_duration_ms >= 7e3, diff --git a/tests/queries/0_stateless/02724_limit_num_mutations.sh b/tests/queries/0_stateless/02724_limit_num_mutations.sh index 60888db0e2e..604cc9ff08e 100755 --- a/tests/queries/0_stateless/02724_limit_num_mutations.sh +++ b/tests/queries/0_stateless/02724_limit_num_mutations.sh @@ -23,7 +23,7 @@ function wait_for_alter() done } -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_limit_mutations SYNC; CREATE TABLE t_limit_mutations (id UInt64, v UInt64) @@ -48,14 +48,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl SHOW CREATE TABLE t_limit_mutations; " -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " ALTER TABLE t_limit_mutations UPDATE v = 6 WHERE 1 SETTINGS number_of_mutations_to_throw = 100; ALTER TABLE t_limit_mutations MODIFY COLUMN v String SETTINGS number_of_mutations_to_throw = 100, alter_sync = 0; " wait_for_alter "String" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_limit_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; SHOW CREATE TABLE t_limit_mutations; @@ -65,7 +65,7 @@ ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_limit_mutations" wait_for_mutation "t_limit_mutations" "0000000003" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_limit_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_limit_mutations' AND NOT is_done; SHOW CREATE TABLE t_limit_mutations; diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.sh b/tests/queries/0_stateless/02725_async_insert_table_setting.sh index 13911e8d677..14c2d335275 100755 --- a/tests/queries/0_stateless/02725_async_insert_table_setting.sh +++ b/tests/queries/0_stateless/02725_async_insert_table_setting.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_mt_async_insert; DROP TABLE IF EXISTS t_mt_sync_insert; @@ -19,7 +19,7 @@ url="${CLICKHOUSE_URL}&async_insert=0&wait_for_async_insert=1" ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_async_insert VALUES (1, 'aa'), (2, 'bb')" ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_sync_insert VALUES (1, 'aa'), (2, 'bb')" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT count() FROM t_mt_async_insert; SELECT count() FROM t_mt_sync_insert; diff --git a/tests/queries/0_stateless/02841_local_assert.sh b/tests/queries/0_stateless/02841_local_assert.sh index a167c09da1f..dc49007b0f6 100755 --- a/tests/queries/0_stateless/02841_local_assert.sh +++ b/tests/queries/0_stateless/02841_local_assert.sh @@ -7,12 +7,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo "create table test (x UInt64) engine=Memory; -insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -nm +insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS}" | $CLICKHOUSE_LOCAL -m echo "create table test (x UInt64) engine=Memory; -insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -nm --ignore-error +insert into test from infile 'data';" | $CLICKHOUSE_LOCAL -m --ignore-error echo "create table test (x UInt64) engine=Memory; insert into test from infile 'data'; -- {clientError BAD_ARGUMENTS} -select 1" | $CLICKHOUSE_LOCAL -nm +select 1" | $CLICKHOUSE_LOCAL -m diff --git a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh index 4bc29ce4233..be0ef4e2648 100755 --- a/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh +++ b/tests/queries/0_stateless/02868_no_merge_across_partitions_final_with_lonely.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm -q """ +${CLICKHOUSE_CLIENT} -m -q """ DROP TABLE IF EXISTS with_lonely; CREATE TABLE with_lonely @@ -23,7 +23,7 @@ ORDER BY (id); """ create_optimize_partition() { - ${CLICKHOUSE_CLIENT} -nm -q """ + ${CLICKHOUSE_CLIENT} -m -q """ INSERT INTO with_lonely SELECT number, '$1', number*10, 0 FROM numbers(10); INSERT INTO with_lonely SELECT number+500000, '$1', number*10, 1 FROM numbers(10); """ @@ -39,7 +39,7 @@ create_optimize_partition "2022-10-29" create_optimize_partition "2022-10-30" create_optimize_partition "2022-10-31" -${CLICKHOUSE_CLIENT} -nm -q """ +${CLICKHOUSE_CLIENT} -m -q """ SYSTEM STOP MERGES with_lonely; INSERT INTO with_lonely SELECT number, '2022-11-01', number*10, 0 FROM numbers(10); diff --git a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh index cc4ce9b122e..418e439e44b 100755 --- a/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh +++ b/tests/queries/0_stateless/02871_clickhouse_client_restart_pager.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # head by default print 10 rows, but it is not enough to query 11 rows, since # we need to overflow the default pipe size, hence just 1 million of rows (it # should be around 6 MiB in text representation, should be definitelly enough). -$CLICKHOUSE_CLIENT --ignore-error -nm --pager head -q " +$CLICKHOUSE_CLIENT --ignore-error -m --pager head -q " select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } select * from numbers(1e6); -- { clientError CANNOT_WRITE_TO_FILE_DESCRIPTOR } " diff --git a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh index 3f2b732e71b..3a7d861262e 100755 --- a/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh +++ b/tests/queries/0_stateless/02875_clickhouse_local_multiquery.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "select 1; select 2;" $CLICKHOUSE_LOCAL -q "select 1; select 2;" # -n is a no-op -$CLICKHOUSE_CLIENT -n -q "select 1; select 2;" -$CLICKHOUSE_LOCAL -n -q "select 1; select 2;" +$CLICKHOUSE_CLIENT -q "select 1; select 2;" +$CLICKHOUSE_LOCAL -q "select 1; select 2;" exit 0 diff --git a/tests/queries/0_stateless/02875_merge_engine_set_index.sh b/tests/queries/0_stateless/02875_merge_engine_set_index.sh index 355d83167a6..f40696c31a9 100755 --- a/tests/queries/0_stateless/02875_merge_engine_set_index.sh +++ b/tests/queries/0_stateless/02875_merge_engine_set_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " CREATE TABLE t1 ( a UInt32, @@ -57,7 +57,7 @@ ORDER BY b DESC FORMAT Null;" -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectedMarks'] diff --git a/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh b/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh index 8bdaa47c111..dd08724456b 100755 --- a/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh +++ b/tests/queries/0_stateless/02878_use_structure_from_insertion_table_with_explicit_insert_columns.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL -q "select 42 as x format Native" > $CLICKHOUSE_TEST_UNIQUE_NAME.native -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64, y UInt64) engine=Memory; insert into test (x) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); insert into test (y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); diff --git a/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh b/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh index 315bbcd544f..c7270b65e19 100755 --- a/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh +++ b/tests/queries/0_stateless/02879_use_structure_from_insertion_table_with_defaults.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_LOCAL -q "select 1 as x format Native" > $CLICKHOUSE_TEST_UNIQUE_NAME.native -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64, y UInt64 default 42) engine=Memory; insert into test select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.native'); select * from test; diff --git a/tests/queries/0_stateless/02883_named_collections_override.sh b/tests/queries/0_stateless/02883_named_collections_override.sh index a08c795127d..915ce280226 100755 --- a/tests/queries/0_stateless/02883_named_collections_override.sh +++ b/tests/queries/0_stateless/02883_named_collections_override.sh @@ -8,7 +8,7 @@ u1="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection1" u2="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection2" u3="${CLICKHOUSE_TEST_UNIQUE_NAME}_collection3" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP NAMED COLLECTION IF EXISTS $u1; DROP NAMED COLLECTION IF EXISTS $u2; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh index 7f583087336..791515c82d6 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_1.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_1; CREATE TABLE t_async_insert_native_1 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -22,7 +22,7 @@ echo '{"id": 1, "s": "aaa"}' \ | $CLICKHOUSE_CLIENT $async_insert_options -q 'INSERT INTO t_async_insert_native_1 FORMAT JSONEachRow {"id": 2, "s": "bbb"}' 2>&1 \ | grep -o "NOT_IMPLEMENTED" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT sum(length(entries.bytes)) FROM system.asynchronous_inserts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_native_1'; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh index b9b1854eaef..a8a9209ee68 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_2.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_2; CREATE TABLE t_async_insert_native_2 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -18,7 +18,7 @@ echo "(3, 'ccc') (4, 'ddd') (5, 'eee')" | $CLICKHOUSE_CLIENT $async_insert_optio wait -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT * FROM t_async_insert_native_2 ORDER BY id; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh index c9d399607d0..229f13eb821 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_3.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_3; CREATE TABLE t_async_insert_native_3 (id UInt64, s String) ENGINE = MergeTree ORDER BY id; " @@ -21,7 +21,7 @@ $CLICKHOUSE_CLIENT $async_insert_options -q "INSERT INTO t_async_insert_native_3 wait -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT format, length(entries.bytes) FROM system.asynchronous_inserts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_async_insert_native_3' ORDER BY format; diff --git a/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh b/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh index 9118c11315c..e84c1ca8899 100755 --- a/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh +++ b/tests/queries/0_stateless/02884_async_insert_native_protocol_4.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_async_insert_native_4; CREATE TABLE t_async_insert_native_4 (id UInt64) ENGINE = MergeTree ORDER BY id; " @@ -20,7 +20,7 @@ echo "(2) (3) (4) (5)" | $CLICKHOUSE_CLIENT_WITH_LOG $async_insert_options --asy -q 'INSERT INTO t_async_insert_native_4 FORMAT Values' 2>&1 \ | grep -c "too much data" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " SELECT * FROM t_async_insert_native_4 ORDER BY id; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh index 2917ec86957..065658d4d56 100755 --- a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh +++ b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', a $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Alias y, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS" $CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Materialized 42, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -n -q " +$CLICKHOUSE_LOCAL -q " create table test (x UInt64 Ephemeral, y UInt64 default x + 1) engine=Memory; insert into test (x, y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl'); select * from test; diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh index a364e447062..74000bc298f 100755 --- a/tests/queries/0_stateless/02895_npy_output_format.sh +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -9,7 +9,7 @@ mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -${CLICKHOUSE_CLIENT} -n -q --ignore-error " +${CLICKHOUSE_CLIENT} -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895; CREATE DATABASE IF NOT EXISTS npy_output_02895; diff --git a/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh b/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh index d6775927f35..b4656c9e321 100755 --- a/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh +++ b/tests/queries/0_stateless/02895_peak_memory_usage_http_headers_regression.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS data; DROP TABLE IF EXISTS data2; DROP VIEW IF EXISTS mv1; diff --git a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh index 64f5dd1a987..ef631d9ed1b 100755 --- a/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh +++ b/tests/queries/0_stateless/02903_empty_order_by_throws_error.sh @@ -5,13 +5,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # setting disabled and no order by or primary key; expect error -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; " 2>&1 \ | grep -F -q "You must provide an ORDER BY or PRIMARY KEY expression in the table definition." && echo 'OK' || echo 'FAIL' # setting disabled and primary key in table definition -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() PRIMARY KEY a SETTINGS index_granularity = 8192; SHOW CREATE TABLE test_empty_order_by; diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh index 5065da371a8..b77e5b0b402 100755 --- a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh +++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh @@ -15,7 +15,7 @@ if [[ $($CLICKHOUSE_CLIENT -q "select count()>0 from system.clusters where clust cluster=test_cluster_database_replicated fi -$CLICKHOUSE_CLIENT -nm --distributed_ddl_output_mode=none -q " +$CLICKHOUSE_CLIENT -m --distributed_ddl_output_mode=none -q " drop table if exists rmt1; drop table if exists rmt2; @@ -46,7 +46,7 @@ part_name='%' # wait while there be at least one 'No active replica has part all_0_1_1 or covering part' in logs for _ in {0..50}; do - no_active_repilica_messages=$($CLICKHOUSE_CLIENT -nm -q " + no_active_repilica_messages=$($CLICKHOUSE_CLIENT -m -q " system flush logs; select count() @@ -65,7 +65,7 @@ for _ in {0..50}; do sleep 1 done -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system start pulling replication log rmt2; system flush logs; diff --git a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh index 7ac9b488be5..5f9dc6ea077 100755 --- a/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh +++ b/tests/queries/0_stateless/02904_empty_order_by_with_setting_enabled.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # setting enabled and no order by or primary key -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8) ENGINE = MergeTree() SETTINGS index_granularity = 8192; @@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY tuple()" && echo 'OK' || echo 'FAIL' # setting enabled and per-column primary key -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8 PRIMARY KEY, b String PRIMARY KEY) ENGINE = MergeTree() SETTINGS index_granularity = 8192; @@ -21,7 +21,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY (a, b)" && echo 'OK' || echo 'FAIL' # setting enabled and primary key in table definition (not per-column or order by) -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() PRIMARY KEY (a) SETTINGS index_granularity = 8192; @@ -29,7 +29,7 @@ ${CLICKHOUSE_CLIENT} -n --query=" " 2>&1 \ | grep -F -q "ORDER BY a" && echo 'OK' || echo 'FAIL' # setting enabled and order by in table definition (no primary key) -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET create_table_empty_primary_key_by_default = true; DROP TABLE IF EXISTS test_empty_order_by; CREATE TABLE test_empty_order_by(a UInt8, b String) ENGINE = MergeTree() ORDER BY (a, b) SETTINGS index_granularity = 8192; diff --git a/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh b/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh index 30ec50fa20f..e37f1e51c74 100755 --- a/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh +++ b/tests/queries/0_stateless/02907_backup_mv_with_no_inner_table.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -15,14 +15,14 @@ create materialized view mv (a Int32) engine = MergeTree() order by tuple() as s uuid=$(${CLICKHOUSE_CLIENT} --query "select uuid from system.tables where table='mv' and database == currentDatabase()") inner_table=".inner_id.${uuid}" -${CLICKHOUSE_CLIENT} -nm --query "drop table \`$inner_table\` sync" +${CLICKHOUSE_CLIENT} -m --query "drop table \`$inner_table\` sync" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " set send_logs_level = 'error'; backup table ${CLICKHOUSE_DATABASE}.\`mv\` to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; restore table ${CLICKHOUSE_DATABASE}.\`mv\` from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "RESTORED" diff --git a/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh b/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh index d59ebe400ee..f950954941f 100755 --- a/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh +++ b/tests/queries/0_stateless/02907_backup_mv_with_no_source_table.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -15,18 +15,18 @@ drop table if exists mv; create materialized view mv to dst (a Int32) as select * from src; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table src; backup database ${CLICKHOUSE_DATABASE} on cluster test_shard_localhost to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; set allow_deprecated_database_ordinary=1; restore table ${CLICKHOUSE_DATABASE}.mv on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); " | grep -o "RESTORED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists src; create table src (a Int32) engine = MergeTree() order by tuple(); @@ -37,13 +37,13 @@ drop table if exists mv; create materialized view mv to dst (a Int32) as select * from src; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table src; drop table dst; backup database ${CLICKHOUSE_DATABASE} on cluster test_shard_localhost to Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table mv; set allow_deprecated_database_ordinary=1; restore table ${CLICKHOUSE_DATABASE}.mv on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); diff --git a/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh b/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh index 8ed36a7edd7..dc5793d1638 100755 --- a/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh +++ b/tests/queries/0_stateless/02907_backup_restore_default_nullable.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; set data_type_default_nullable = 0; create table test (test String) ENGINE = MergeTree() ORDER BY tuple(); @@ -13,7 +13,7 @@ backup table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost to Disk ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test sync; set data_type_default_nullable = 1; restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh index 742d24a97eb..eae307add10 100755 --- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh +++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; set flatten_nested = 0; create table test (test Array(Tuple(foo String, bar Float64))) ENGINE = MergeTree() ORDER BY tuple(); @@ -13,7 +13,7 @@ backup table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost to Disk ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test2; set flatten_nested = 0; create table test2 (test Nested(foo String, bar Float64)) ENGINE = MergeTree() ORDER BY tuple(); @@ -22,7 +22,7 @@ backup table ${CLICKHOUSE_DATABASE}.test2 on cluster test_shard_localhost to Dis ${CLICKHOUSE_CLIENT} --query "show create table test2" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test sync; set flatten_nested = 1; restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}'); @@ -30,7 +30,7 @@ restore table ${CLICKHOUSE_DATABASE}.test on cluster test_shard_localhost from D ${CLICKHOUSE_CLIENT} --query "show create table test" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table test2 sync; set flatten_nested = 1; restore table ${CLICKHOUSE_DATABASE}.test2 on cluster test_shard_localhost from Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}2'); diff --git a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh index 57182050534..2cad15c6fcb 100755 --- a/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh +++ b/tests/queries/0_stateless/02907_clickhouse_dictionary_bug.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " DROP DICTIONARY IF EXISTS 02907_dictionary; DROP TABLE IF EXISTS 02907_table; diff --git a/tests/queries/0_stateless/02907_system_backups_profile_events.sh b/tests/queries/0_stateless/02907_system_backups_profile_events.sh index 801056a2844..9a1d5a3db11 100755 --- a/tests/queries/0_stateless/02907_system_backups_profile_events.sh +++ b/tests/queries/0_stateless/02907_system_backups_profile_events.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " drop table if exists test; create table test (a Int32) engine = MergeTree() order by tuple(); " @@ -12,10 +12,10 @@ create table test (a Int32) engine = MergeTree() order by tuple(); backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME} backup_name="Disk('backups', '$backup_id')"; -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " backup table ${CLICKHOUSE_DATABASE}.test to $backup_name; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select ProfileEvents['BackupEntriesCollectorMicroseconds'] > 10 from system.backups where name='Disk(\'backups\', \'$backup_id\')' " diff --git a/tests/queries/0_stateless/02908_Npy_files_caching.sh b/tests/queries/0_stateless/02908_Npy_files_caching.sh index 4845f740972..218e13efb95 100755 --- a/tests/queries/0_stateless/02908_Npy_files_caching.sh +++ b/tests/queries/0_stateless/02908_Npy_files_caching.sh @@ -7,13 +7,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=0" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy') settings optimize_count_from_files=1" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/one_dim.npy', auto, 'array Int64') settings optimize_count_from_files=1" -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " desc file('$CURDIR/data_npy/one_dim.npy'); select number_of_rows from system.schema_inference_cache where format='Npy'; " $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=0" $CLICKHOUSE_LOCAL -q "select count() from file('$CURDIR/data_npy/npy_big.npy') settings optimize_count_from_files=1" -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " desc file('$CURDIR/data_npy/npy_big.npy'); select number_of_rows from system.schema_inference_cache where format='Npy'; " diff --git a/tests/queries/0_stateless/02908_table_ttl_dependency.sh b/tests/queries/0_stateless/02908_table_ttl_dependency.sh index 70136b4a42b..0bc02426f61 100755 --- a/tests/queries/0_stateless/02908_table_ttl_dependency.sh +++ b/tests/queries/0_stateless/02908_table_ttl_dependency.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS 02908_dependent; DROP TABLE IF EXISTS 02908_main; @@ -14,11 +14,11 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE 02908_dependent (a UInt32, ts DateTime) ENGINE = MergeTree ORDER BY a TTL ts + 1 WHERE a IN (SELECT a FROM ${CLICKHOUSE_DATABASE}.02908_main); " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE 02908_main; " 2>&1 | grep -F -q "HAVE_DEPENDENT_OBJECTS" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE 02908_dependent; DROP TABLE 02908_main; " diff --git a/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh b/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh index 8da144f90ca..75d491642ea 100755 --- a/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh +++ b/tests/queries/0_stateless/02909_settings_in_json_schema_cache.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS schema_inference_make_columns_nullable=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS schema_inference_make_columns_nullable=0; SELECT count() from system.schema_inference_cache where format = 'JSON' and additional_format_info like '%schema_inference_make_columns_nullable%';" diff --git a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh index 80e38338751..7ad38e11e96 100755 --- a/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh +++ b/tests/queries/0_stateless/02915_input_table_function_in_subquery.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " CREATE TABLE IF NOT EXISTS ts_data_double_raw ( device_id UInt32 NOT NULL CODEC(ZSTD), diff --git a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh index 5f0f41a956b..b6d6ca57768 100755 --- a/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh +++ b/tests/queries/0_stateless/02915_lazy_loading_of_base_backups.sh @@ -13,40 +13,40 @@ b_backup="Disk('backups', '$b_backup_id')" c_backup_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_c c_backup="Disk('backups', '$c_backup_id')" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS tbl1; DROP TABLE IF EXISTS tbl2; DROP TABLE IF EXISTS tbl3; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl1 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must write backup 'a'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $a_backup SETTINGS id='$a_backup_id'; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl2 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must read backup 'a' and write backup 'b'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $b_backup SETTINGS id='$b_backup_id', base_backup=$a_backup; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE TABLE tbl3 (a Int32) ENGINE = MergeTree() ORDER BY tuple(); " # The following BACKUP command must read only backup 'b' (and not 'a') and write backup 'c'. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " BACKUP DATABASE ${CLICKHOUSE_DATABASE} TO $c_backup SETTINGS id='$c_backup_id', base_backup=$b_backup; " | grep -o "BACKUP_CREATED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl1; DROP TABLE tbl2; DROP TABLE tbl3; @@ -57,28 +57,28 @@ r2_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r2 r3_restore_id=${CLICKHOUSE_TEST_UNIQUE_NAME}_r3 # The following RESTORE command must read all 3 backups 'a', 'b', c' because the table 'tbl1' was in the first backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl1 FROM $c_backup SETTINGS id='$r1_restore_id'; " | grep -o "RESTORED" # The following RESTORE command must read only 2 backups 'b', c' (and not 'a') because the table 'tbl2' was in the second backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl2 FROM $c_backup SETTINGS id='$r2_restore_id'; " | grep -o "RESTORED" # The following RESTORE command must read only 1 backup 'c' (and not 'a' or 'b') because the table 'tbl3' was in the third backup. -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " RESTORE TABLE ${CLICKHOUSE_DATABASE}.tbl3 FROM $c_backup SETTINGS id='$r3_restore_id'; " | grep -o "RESTORED" all_ids="['$a_backup_id', '$b_backup_id', '$c_backup_id', '$r1_restore_id', '$r2_restore_id', '$r3_restore_id']" id_prefix_len=`expr "${CLICKHOUSE_TEST_UNIQUE_NAME}_" : '.*'` -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " SELECT substr(id, 1 + $id_prefix_len) as short_id, ProfileEvents['BackupsOpenedForRead'], ProfileEvents['BackupsOpenedForWrite'] FROM system.backups WHERE id IN ${all_ids} ORDER BY short_id " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl1; DROP TABLE tbl2; DROP TABLE tbl3; diff --git a/tests/queries/0_stateless/02916_dictionary_access.sh b/tests/queries/0_stateless/02916_dictionary_access.sh index 08ee517ab3b..be62cc027ef 100755 --- a/tests/queries/0_stateless/02916_dictionary_access.sh +++ b/tests/queries/0_stateless/02916_dictionary_access.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} ( id UInt64, @@ -23,15 +23,15 @@ ${CLICKHOUSE_CLIENT} -nm --query " SELECT dictGet(${dictname}, 'value', 1); " -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM dictionary(${dictname}); " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT dictGet(${dictname}, 'value', 1); " 2>&1 | grep -o ACCESS_DENIED | uniq -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; " diff --git a/tests/queries/0_stateless/02916_joinget_dependency.sh b/tests/queries/0_stateless/02916_joinget_dependency.sh index 6477ae8c967..ff9332cb57f 100755 --- a/tests/queries/0_stateless/02916_joinget_dependency.sh +++ b/tests/queries/0_stateless/02916_joinget_dependency.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We test the dependency on the DROP -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS Sub_distributed; DROP TABLE IF EXISTS Sub; DROP TABLE IF EXISTS Mapping; @@ -20,8 +20,8 @@ $CLICKHOUSE_CLIENT -q " DROP TABLE Mapping; " 2>&1 | grep -cm1 "HAVE_DEPENDENT_OBJECTS" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE Sub_distributed; DROP TABLE Sub; DROP TABLE Mapping; -" \ No newline at end of file +" diff --git a/tests/queries/0_stateless/02930_client_file_log_comment.sh b/tests/queries/0_stateless/02930_client_file_log_comment.sh index 50cd587e4b5..393bffcaf59 100755 --- a/tests/queries/0_stateless/02930_client_file_log_comment.sh +++ b/tests/queries/0_stateless/02930_client_file_log_comment.sh @@ -14,7 +14,7 @@ echo -n 'select 4242' >> "$file2" $CLICKHOUSE_CLIENT --queries-file "$file1" "$file2" <<<'select 42' $CLICKHOUSE_CLIENT --log_comment foo --queries-file /dev/stdin <<<'select 424242' -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system flush logs; select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 42' and type != 'QueryStart'; select query, log_comment from system.query_log where current_database = '$CLICKHOUSE_DATABASE' and event_date >= yesterday() and query = 'select 4242' and type != 'QueryStart'; diff --git a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh index d9e4a2c8f8b..76ab56a4570 100755 --- a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh +++ b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " create table test (x UInt64, y UInt32, size UInt64) engine=Memory; insert into test select c1, c2, _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1; select * from test; diff --git a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh index 0e23bb6c42b..9d1faf301d3 100755 --- a/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh +++ b/tests/queries/0_stateless/02940_system_stacktrace_optimizations.sh @@ -9,12 +9,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # no message at all echo "thread = 0" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_id = 0" |& grep -F -o 'Send signal to' # send messages to some threads echo "thread != 0" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_id != 0 format Null" |& grep -F -o 'Send signal to' | grep -v 'Send signal to 0 threads (total)' # there is no thread with comm="foo", so no signals will be sent echo "thread_name = 'foo'" -$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -nm -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' +$CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=test -m -q "select * from system.stack_trace where thread_name = 'foo' format Null" |& grep -F -o 'Send signal to 0 threads (total)' diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh index 27950866e81..44af2dbf26f 100755 --- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh +++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh @@ -26,7 +26,7 @@ function wait_part() function restore_failpoints() { # restore entry error with failpoints (to avoid endless errors in logs) - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT -m -q " system enable failpoint replicated_queue_unfail_entries; system sync replica $failed_replica; system disable failpoint replicated_queue_unfail_entries; @@ -34,7 +34,7 @@ function restore_failpoints() } trap restore_failpoints EXIT -$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " +$CLICKHOUSE_CLIENT -m --insert_keeper_fault_injection_probability=0 -q " drop table if exists data_r1; drop table if exists data_r2; @@ -45,7 +45,7 @@ $CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " " # will fail ALTER_METADATA on one of replicas -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system enable failpoint replicated_queue_fail_next_entry; alter table data_r1 drop index value_idx settings alter_sync=0; -- part all_0_0_0_1 @@ -80,7 +80,7 @@ fi # This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge) $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1 -$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " +$CLICKHOUSE_CLIENT -m --insert_keeper_fault_injection_probability=0 -q " insert into $success_replica (key) values (2); -- part all_2_2_0 -- Avoid 'Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet' system sync replica $success_replica pull; diff --git a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh index 6cb184cb1fe..ec699d974d4 100755 --- a/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh +++ b/tests/queries/0_stateless/02947_merge_tree_index_table_3.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) user_name="${CLICKHOUSE_DATABASE}_test_user_02947" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_merge_tree_index; DROP USER IF EXISTS $user_name; @@ -44,7 +44,7 @@ $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT arr.size $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" $CLICKHOUSE_CLIENT --user "$user_name" --password "password" -q "SELECT b.mark FROM mergeTreeIndex(currentDatabase(), t_merge_tree_index, with_marks = true)" 2>&1 | grep -m1 -o "ACCESS_DENIED" || echo "OK" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS t_merge_tree_index; DROP USER IF EXISTS $user_name; " diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh index daa9c571a5d..d06aba8a4b6 100755 --- a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE source_table ( id UInt64, diff --git a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh index 7f690a681c4..737d5c6b41e 100755 --- a/tests/queries/0_stateless/02950_distributed_initial_query_event.sh +++ b/tests/queries/0_stateless/02950_distributed_initial_query_event.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # CREATE TABLE local (x UInt8) Engine=Memory; # CREATE TABLE distributed ON CLUSTER cluster (p Date, i Int32) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), x) -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS local; DROP TABLE IF EXISTS distributed; CREATE TABLE local (x UInt8) Engine=Memory; diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.sh b/tests/queries/0_stateless/02974_backup_query_format_null.sh index ddba2f6de16..345a4f47b20 100755 --- a/tests/queries/0_stateless/02974_backup_query_format_null.sh +++ b/tests/queries/0_stateless/02974_backup_query_format_null.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS tbl; CREATE TABLE tbl (a Int32) ENGINE = MergeTree() ORDER BY tuple(); INSERT INTO tbl VALUES (2), (80), (-12345); @@ -14,7 +14,7 @@ backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" ${CLICKHOUSE_CLIENT} --query "BACKUP TABLE tbl TO ${backup_name} FORMAT Null" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE tbl; RESTORE ALL FROM ${backup_name} FORMAT Null " diff --git a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh index e65c9654c9c..7ad5a2179f9 100755 --- a/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh +++ b/tests/queries/0_stateless/02981_insert_select_resize_to_max_insert_threads.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq """ +${CLICKHOUSE_CLIENT} -q """ CREATE TABLE t1_local ( n UInt64, diff --git a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh index e9f59138177..07b99eb4e99 100755 --- a/tests/queries/0_stateless/03008_deduplication_random_setttings.sh +++ b/tests/queries/0_stateless/03008_deduplication_random_setttings.sh @@ -35,7 +35,7 @@ THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ --insert-method $insert_method \ --table-engine $engine \ @@ -48,7 +48,7 @@ $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " ) " 1>/dev/null 2>&1 && echo 'insert_several_blocks_into_table OK' || echo "FAIL: insert_several_blocks_into_table ${THIS_RUN}" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ --insert-method $insert_method \ --table-engine $engine \ @@ -61,7 +61,7 @@ $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " ) " 1>/dev/null 2>&1 && echo 'mv_generates_several_blocks OK' || echo "FAIL: mv_generates_several_blocks ${THIS_RUN}" -$CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " +$CLICKHOUSE_CLIENT --max_insert_block_size 1 -mq " $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ --insert-method $insert_method \ --table-engine $engine \ diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh index d51e180efc9..e61f9061297 100755 --- a/tests/queries/0_stateless/03008_local_plain_rewritable.sh +++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query "drop table if exists 03008_test_local_mt sync" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " create table 03008_test_local_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) settings disk = disk( @@ -19,35 +19,35 @@ settings disk = disk( path = '/var/lib/clickhouse/disks/local_plain_rewritable/') " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " insert into 03008_test_local_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); insert into 03008_test_local_mt (*) select number, number, number from numbers_mt(10000); " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select count(*) from 03008_test_local_mt; select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; " ${CLICKHOUSE_CLIENT} --query "optimize table 03008_test_local_mt final;" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " alter table 03008_test_local_mt modify setting disk = '03008_local_plain_rewritable', old_parts_lifetime = 3600; select engine_full from system.tables WHERE database = currentDatabase() AND name = '03008_test_local_mt'; " | grep -c "old_parts_lifetime = 3600" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " select count(*) from 03008_test_local_mt; select (*) from 03008_test_local_mt order by tuple(a, b) limit 10; " -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " alter table 03008_test_local_mt update c = 0 where a % 2 = 1; alter table 03008_test_local_mt add column d Int64 after c; alter table 03008_test_local_mt drop column c; " 2>&1 | grep -Fq "SUPPORT_IS_DISABLED" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " truncate table 03008_test_local_mt; select count(*) from 03008_test_local_mt; " diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.sh b/tests/queries/0_stateless/03031_clickhouse_local_input.sh index 6f59e9b9703..e2f9cf48108 100755 --- a/tests/queries/0_stateless/03031_clickhouse_local_input.sh +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.sh @@ -6,15 +6,15 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) tmp_file="$CUR_DIR/$CLICKHOUSE_DATABASE.txt" echo '# foo' -$CLICKHOUSE_LOCAL --engine_file_truncate_on_insert=1 -n -q "insert into function file('$tmp_file', 'LineAsString', 'x String') select * from input('x String') format LineAsString" << "$INPUT_FILE" -$CLICKHOUSE_CLIENT --external --file="$INPUT_FILE" --name=t --structure='x String' -nm -q " +$CLICKHOUSE_CLIENT --external --file="$INPUT_FILE" --name=t --structure='x String' -m -q " select * from t; select * from t; " diff --git a/tests/queries/0_stateless/03143_prewhere_profile_events.sh b/tests/queries/0_stateless/03143_prewhere_profile_events.sh index 00daa0fe7cc..6a6b993e5f8 100755 --- a/tests/queries/0_stateless/03143_prewhere_profile_events.sh +++ b/tests/queries/0_stateless/03143_prewhere_profile_events.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " DROP TABLE IF EXISTS t; CREATE TABLE t(a UInt32, b UInt32, c UInt32, d UInt32) ENGINE=MergeTree ORDER BY a SETTINGS min_bytes_for_wide_part=0, min_rows_for_wide_part=0; @@ -25,7 +25,7 @@ client_opts=( --max_threads 8 ) -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_1" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 @@ -33,7 +33,7 @@ PREWHERE (b % 8192) = 42 FORMAT Null " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_2" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 @@ -42,7 +42,7 @@ PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 settings enable_multiple_prewhere_read_steps=1; " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_3" -q " SELECT * FROM t PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 @@ -51,7 +51,7 @@ PREWHERE (b % 8192) = 42 AND (c % 16384) = 42 settings enable_multiple_prewhere_read_steps=0; " -${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -nq " +${CLICKHOUSE_CLIENT} "${client_opts[@]}" --query_id "$query_id_4" -q " SELECT b, c FROM t PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 @@ -59,7 +59,7 @@ PREWHERE (b % 8192) = 42 AND (c % 8192) = 42 settings enable_multiple_prewhere_read_steps=1; " -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " SYSTEM FLUSH LOGS; -- 52503 which is 43 * number of granules, 10000000 diff --git a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh index 95aef9bbc5b..4e7b318e202 100755 --- a/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh +++ b/tests/queries/0_stateless/03145_non_loaded_projection_backup.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists tp_1; create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100) settings max_parts_to_merge_at_once=1; insert into tp_1 select number, number from numbers(3); @@ -25,7 +25,7 @@ alter table tp_1 drop projection pp; alter table tp_1 attach partition '0'; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" @@ -34,19 +34,19 @@ $CLICKHOUSE_CLIENT -q " backup table tp_1 to Disk('backups', '$backup_id'); " | grep -o "BACKUP_CREATED" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; drop table tp_1; restore table tp_1 from Disk('backups', '$backup_id'); " | grep -o "RESTORED" $CLICKHOUSE_CLIENT -q "select count() from tp_1;" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; check table tp_1" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " set send_logs_level='fatal'; drop table tp_1" diff --git a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh index b6980b3a23a..f33a6b3ef27 100755 --- a/tests/queries/0_stateless/03155_test_move_to_prewhere.sh +++ b/tests/queries/0_stateless/03155_test_move_to_prewhere.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE event_envoy ( timestamp_interval DateTime CODEC(DoubleDelta), @@ -18,7 +18,7 @@ ${CLICKHOUSE_CLIENT} -nq " INSERT INTO event_envoy SELECT now() - number, 'us-east-1', 'ch_super_fast' FROM numbers_mt(1e5); " -${CLICKHOUSE_CLIENT} -nq " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE event_envoy_remote ( timestamp_interval DateTime CODEC(DoubleDelta), diff --git a/tests/queries/0_stateless/03156_default_multiquery_split.sh b/tests/queries/0_stateless/03156_default_multiquery_split.sh index 8ba2f46b786..d849fb5a162 100755 --- a/tests/queries/0_stateless/03156_default_multiquery_split.sh +++ b/tests/queries/0_stateless/03156_default_multiquery_split.sh @@ -53,6 +53,6 @@ SELECT * FROM TEST2 ORDER BY value; DROP TABLE TEST1; DROP TABLE TEST2; EOF -$CLICKHOUSE_CLIENT -m -n < "$SQL_FILE_NAME" +$CLICKHOUSE_CLIENT -m < "$SQL_FILE_NAME" rm "$SQL_FILE_NAME" diff --git a/tests/queries/0_stateless/03169_time_virtual_column.sh b/tests/queries/0_stateless/03169_time_virtual_column.sh index fef1de8c6f2..b289f39accb 100755 --- a/tests/queries/0_stateless/03169_time_virtual_column.sh +++ b/tests/queries/0_stateless/03169_time_virtual_column.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv sleep 1 -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " select _size, (dateDiff('millisecond', _time, now()) < 600000 AND dateDiff('millisecond', _time, now()) > 0) from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv'); " rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh index 7c567c0f58f..af702569794 100755 --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh index ce53f467823..583257d8fd3 100755 --- a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh @@ -5,25 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo -e 'a,b,c\n1,2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%skip_first_lines%';" echo -e 'a,b,c\n"1",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_numbers_from_strings%';" echo -e 'a,b,c\n"(1,2,3)",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=0; SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_strings_from_quoted_tuples%';" echo -e 'a\tb\tc\n1\t2\t3' > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=1; DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=0; SELECT count() from system.schema_inference_cache where format = 'TSV' and additional_format_info like '%skip_first_lines%';" diff --git a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh index 4f217935123..c759cc34425 100755 --- a/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh +++ b/tests/queries/0_stateless/03198_unload_primary_key_outdated.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n " +$CLICKHOUSE_CLIENT " DROP TABLE IF EXISTS t_unload_primary_key; CREATE TABLE t_unload_primary_key (a UInt64, b UInt64) @@ -26,7 +26,7 @@ for _ in {1..100}; do sleep 0.3 done -$CLICKHOUSE_CLIENT -n " +$CLICKHOUSE_CLIENT " SELECT name, active, primary_key_bytes_in_memory FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_unload_primary_key' ORDER BY name; DROP TABLE IF EXISTS t_unload_primary_key; " diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.sh b/tests/queries/0_stateless/03199_dictionary_table_access.sh index 952b466b5da..14f017c7fbc 100755 --- a/tests/queries/0_stateless/03199_dictionary_table_access.sh +++ b/tests/queries/0_stateless/03199_dictionary_table_access.sh @@ -8,7 +8,7 @@ username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} ( id UInt64, @@ -26,15 +26,15 @@ ${CLICKHOUSE_CLIENT} -nm --query " SELECT * FROM ${dicttablename}; " -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM ${dictname}; " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " +$CLICKHOUSE_CLIENT -m --user="${username}" --query " SELECT * FROM ${dicttablename}; " 2>&1 | grep -o ACCESS_DENIED | uniq -${CLICKHOUSE_CLIENT} -nm --query " +${CLICKHOUSE_CLIENT} -m --query " DROP TABLE IF EXISTS ${dicttablename} SYNC; DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh index 8a77538f592..adbb0cb6de0 100755 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh echo '{"x" : 42}' > $CLICKHOUSE_TEST_UNIQUE_NAME.json -$CLICKHOUSE_LOCAL -nm -q " +$CLICKHOUSE_LOCAL -m -q " DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.json') SETTINGS input_format_max_bytes_to_read_for_schema_inference=1000; SELECT additional_format_info from system.schema_inference_cache" From 8f124710ef97801f020fb88e31bcd94529a112fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 12 Aug 2024 10:27:21 +0000 Subject: [PATCH 128/132] Remove Log engine from Kafka integration tests It doesn't work well when `thread_per_consumer` is used as writer can make readers starve when `shared_time_mutex` prefers writes over reads. --- tests/integration/test_storage_kafka/test.py | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 4b6c9922d74..52d6054c12a 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -1019,7 +1019,7 @@ def test_kafka_formats(kafka_cluster, create_query_generator): DROP TABLE IF EXISTS test.kafka_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name}; """.format( topic_name=topic_name, @@ -2460,7 +2460,7 @@ def test_kafka_commit_on_block_write(kafka_cluster, create_query_generator): (generate_old_create_table_query, "kafka.*Committed offset 2.*virt2_[01]"), ( generate_new_create_table_query, - r"kafka.*Saved offset 2[0-9]* for topic-partition \[virt2_[01]:[0-9]+", + r"kafka.*Saved offset 2 for topic-partition \[virt2_[01]:[0-9]+", ), ], ) @@ -2494,7 +2494,7 @@ def test_kafka_virtual_columns2(kafka_cluster, create_query_generator, log_line) f""" {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS SELECT value, _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp), toUnixTimestamp64Milli(_timestamp_ms), _headers.name, _headers.value FROM test.kafka; """ ) @@ -2729,7 +2729,7 @@ def test_kafka_produce_key_timestamp(kafka_cluster, create_query_generator, log_ DROP TABLE IF EXISTS test.consumer; {writer_create_query}; {reader_create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS SELECT key, value, inserted_key, toUnixTimestamp(inserted_timestamp), _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp) FROM test.kafka; """ ) @@ -2865,7 +2865,7 @@ def test_kafka_produce_consume_avro(kafka_cluster, create_query_generator): {writer_create_query}; {reader_create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS SELECT key, value FROM test.kafka; """ ) @@ -3537,7 +3537,7 @@ def test_bad_reschedule(kafka_cluster, create_query_generator): f""" {create_query}; - CREATE MATERIALIZED VIEW test.destination Engine=Log AS + CREATE MATERIALIZED VIEW test.destination ENGINE=MergeTree ORDER BY tuple() AS SELECT key, now() as consume_ts, @@ -3745,7 +3745,7 @@ def test_kafka_unavailable(kafka_cluster, create_query_generator, do_direct_read f""" {create_query}; - CREATE MATERIALIZED VIEW test.destination_unavailable Engine=Log AS + CREATE MATERIALIZED VIEW test.destination_unavailable ENGINE=MergeTree ORDER BY tuple() AS SELECT key, now() as consume_ts, @@ -4267,12 +4267,12 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator {create_query}; DROP TABLE IF EXISTS test.kafka_data_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_data_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_data_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name} WHERE length(_error) = 0; DROP TABLE IF EXISTS test.kafka_errors_{format_name}_mv; - CREATE MATERIALIZED VIEW test.kafka_errors_{format_name}_mv Engine=Log AS + CREATE MATERIALIZED VIEW test.kafka_errors_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS SELECT {raw_message} as raw_message, _error as error, _topic as topic, _partition as partition, _offset as offset FROM test.kafka_{format_name} WHERE length(_error) > 0; """ @@ -4796,7 +4796,7 @@ def test_max_rows_per_message(kafka_cluster, create_query_generator): DROP TABLE IF EXISTS test.kafka; {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS SELECT key, value FROM test.kafka; """ ) @@ -4875,7 +4875,7 @@ def test_row_based_formats(kafka_cluster, create_query_generator): {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); @@ -4982,7 +4982,7 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): {create_query}; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; @@ -5362,7 +5362,7 @@ def test_formats_errors(kafka_cluster): input_format_with_names_use_header=0, format_schema='key_value_message:Message'; - CREATE MATERIALIZED VIEW test.view Engine=Log AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS SELECT key, value FROM test.{table_name}; """ ) From e9d16bc0549fe2a1fc44e50bd173ebc9358ed838 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 12 Aug 2024 08:32:14 +0200 Subject: [PATCH 129/132] Use new mc restart --- docker/test/stateless/run.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 69052cf4771..c70cbe1fe45 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -195,16 +195,18 @@ ORDER BY tuple()" # use async inserts to avoid creating too many parts ./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 ./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500 + max_retries=100 retry=1 - while [ $retry -le $max_retries ]; do echo "clickminio restart attempt $retry:" - output=$(./mc admin service restart clickminio 2>&1) - echo "$output" + output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status) + echo "Output of restart status: $output" - if echo "$output" | grep -q "Restarted \`clickminio\` successfully"; then + expected_output="success +success" + if [ "$output" = "$expected_output" ]; then echo "Restarted clickminio successfully." break fi @@ -218,7 +220,6 @@ if [ $retry -gt $max_retries ]; then echo "Failed to restart clickminio after $max_retries attempts." fi -./mc admin service restart clickminio ./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! From 737948470d6f2cd69f1842396984ec17aea49b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 12 Aug 2024 11:54:34 +0000 Subject: [PATCH 130/132] Fix syntax --- tests/integration/test_storage_kafka/test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 52d6054c12a..bef90e1b9d3 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -4796,7 +4796,7 @@ def test_max_rows_per_message(kafka_cluster, create_query_generator): DROP TABLE IF EXISTS test.kafka; {create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.kafka; """ ) @@ -4875,7 +4875,7 @@ def test_row_based_formats(kafka_cluster, create_query_generator): {create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); @@ -4982,7 +4982,7 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): {create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; @@ -5362,7 +5362,7 @@ def test_formats_errors(kafka_cluster): input_format_with_names_use_header=0, format_schema='key_value_message:Message'; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY key, value AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; """ ) From 34643ee16c5452a08feac63aaaea605a8c912b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 12 Aug 2024 13:30:25 +0000 Subject: [PATCH 131/132] Run test only from modified files --- tests/ci/integration_tests_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 84718462ab5..7b9e7d1f63e 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -69,9 +69,9 @@ def get_changed_tests_to_run(pr_info, repo_path): return [] for fpath in changed_files: - if "tests/integration/test_" in fpath: + if re.search("tests/integration/test_.*/test.*\.py", fpath) is not None: logging.info("File %s changed and seems like integration test", fpath) - result.add(fpath.split("/")[2]) + result.add("/".join(fpath.split("/")[2:])) return filter_existing_tests(result, repo_path) From 6cde029ed9cd6a22937193e12863974f1fa8f160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 12 Aug 2024 13:48:44 +0000 Subject: [PATCH 132/132] Fix style --- tests/ci/integration_tests_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 7b9e7d1f63e..f5dbef4f6db 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -69,7 +69,7 @@ def get_changed_tests_to_run(pr_info, repo_path): return [] for fpath in changed_files: - if re.search("tests/integration/test_.*/test.*\.py", fpath) is not None: + if re.search(r"tests/integration/test_.*/test.*\.py", fpath) is not None: logging.info("File %s changed and seems like integration test", fpath) result.add("/".join(fpath.split("/")[2:])) return filter_existing_tests(result, repo_path)