From 48d47d26a4f18e180fa5602ad0aa89cc7af234a4 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 14 Sep 2023 14:29:35 +0000 Subject: [PATCH 01/57] Add simple unit test for full sorting join --- .../Transforms/MergeJoinTransform.cpp | 71 +++++++++----- .../Transforms/MergeJoinTransform.h | 30 ++++-- .../tests/gtest_merge_join_algorithm.cpp | 95 +++++++++++++++++++ 3 files changed, 166 insertions(+), 30 deletions(-) create mode 100644 src/Processors/tests/gtest_merge_join_algorithm.cpp diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 159a3244fe9..a9fd7978249 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -270,35 +270,45 @@ bool FullMergeJoinCursor::fullyCompleted() const } MergeJoinAlgorithm::MergeJoinAlgorithm( - JoinPtr table_join_, + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, const Blocks & input_headers, size_t max_block_size_) - : table_join(table_join_) + : kind(kind_) + , strictness(strictness_) , max_block_size(max_block_size_) , log(getLogger("MergeJoinAlgorithm")) { if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); - auto strictness = table_join->getTableJoin().strictness(); if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); - auto kind = table_join->getTableJoin().kind(); if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); - const auto & join_on = table_join->getTableJoin().getOnlyClause(); - - if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) + if (on_clause_.on_filter_condition_left || on_clause_.on_filter_condition_right) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); cursors = { - createCursor(input_headers[0], join_on.key_names_left), - createCursor(input_headers[1], join_on.key_names_right) + createCursor(input_headers[0], on_clause_.key_names_left), + createCursor(input_headers[1], on_clause_.key_names_right) }; - for (const auto & [left_key, right_key] : table_join->getTableJoin().leftToRightKeyRemap()) +MergeJoinAlgorithm::MergeJoinAlgorithm( + JoinPtr join_ptr, + const Blocks & input_headers, + size_t max_block_size_) + : MergeJoinAlgorithm( + join_ptr->getTableJoin().kind(), + join_ptr->getTableJoin().strictness(), + join_ptr->getTableJoin().getOnlyClause(), + input_headers, + max_block_size_) +{ + for (const auto & [left_key, right_key] : join_ptr->getTableJoin().leftToRightKeyRemap()) { size_t left_idx = input_headers[0].getPositionByName(left_key); size_t right_idx = input_headers[1].getPositionByName(right_key); @@ -398,7 +408,7 @@ struct AllJoinImpl size_t lnum = nextDistinct(left_cursor.cursor); size_t rnum = nextDistinct(right_cursor.cursor); - bool all_fit_in_block = std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; + bool all_fit_in_block = !max_block_size || std::max(left_map.size(), right_map.size()) + lnum * rnum <= max_block_size; bool have_all_ranges = left_cursor.cursor.isValid() && right_cursor.cursor.isValid(); if (all_fit_in_block && have_all_ranges) { @@ -498,7 +508,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState } size_t total_rows = 0; - while (total_rows < max_block_size) + while (!max_block_size || total_rows < max_block_size) { const auto & left_range = all_join_state->getLeft(); const auto & right_range = all_join_state->getRight(); @@ -523,7 +533,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin(JoinKind kind) +MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin() { PaddedPODArray idx_map[2]; @@ -671,8 +681,6 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState if (any_join_state.empty()) return {}; - auto kind = table_join->getTableJoin().kind(); - Chunk result; for (size_t source_num = 0; source_num < 2; ++source_num) @@ -717,7 +725,7 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState return {}; } -MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin(JoinKind kind) +MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin() { if (auto result = handleAnyJoinState()) return std::move(*result); @@ -804,8 +812,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num) IMergingAlgorithm::Status MergeJoinAlgorithm::merge() { - auto kind = table_join->getTableJoin().kind(); - if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) return Status(0); @@ -849,13 +855,11 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } } - auto strictness = table_join->getTableJoin().strictness(); - if (strictness == JoinStrictness::Any) - return anyJoin(kind); + return anyJoin(); if (strictness == JoinStrictness::All) - return allJoin(kind); + return allJoin(); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); } @@ -874,9 +878,30 @@ MergeJoinTransform::MergeJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) +<<<<<<< HEAD , log(getLogger("MergeJoinTransform")) +======= +>>>>>>> b4a16f38320 (Add simple unit test for full sorting join) +{ +} + +MergeJoinTransform::MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_) + : IMergingTransform( + input_headers, + output_header, + /* have_all_inputs_= */ true, + limit_hint_, + /* always_read_till_end_= */ false, + /* empty_chunk_on_finish_= */ true, + kind_, strictness_, on_clause_, input_headers, max_block_size) { - LOG_TRACE(log, "Use MergeJoinTransform"); } void MergeJoinTransform::onFinish() diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index cf9331abd59..0b0efa33722 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -19,6 +20,7 @@ #include #include #include +#include namespace Poco { class Logger; } @@ -227,7 +229,13 @@ private: class MergeJoinAlgorithm final : public IMergingAlgorithm { public: - explicit MergeJoinAlgorithm(JoinPtr table_join, const Blocks & input_headers, size_t max_block_size_); + MergeJoinAlgorithm(JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + size_t max_block_size_); + + MergeJoinAlgorithm(JoinPtr join_ptr, const Blocks & input_headers, size_t max_block_size_); const char * getName() const override { return "MergeJoinAlgorithm"; } void initialize(Inputs inputs) override; @@ -238,10 +246,10 @@ public: private: std::optional handleAnyJoinState(); - Status anyJoin(JoinKind kind); + Status anyJoin(); std::optional handleAllJoinState(); - Status allJoin(JoinKind kind); + Status allJoin(); Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; @@ -251,11 +259,12 @@ private: std::array cursors; - /// Keep some state to make connection between data in different blocks + /// Keep some state to make handle data from different blocks AnyJoinState any_join_state; std::unique_ptr all_join_state; - JoinPtr table_join; + JoinKind kind; + JoinStrictness strictness; size_t max_block_size; int null_direction_hint = 1; @@ -285,12 +294,19 @@ public: size_t max_block_size, UInt64 limit_hint = 0); + MergeJoinTransform( + JoinKind kind_, + JoinStrictness strictness_, + const TableJoin::JoinOnClause & on_clause_, + const Blocks & input_headers, + const Block & output_header, + size_t max_block_size, + UInt64 limit_hint_ = 0); + String getName() const override { return "MergeJoinTransform"; } protected: void onFinish() override; - - LoggerPtr log; }; } diff --git a/src/Processors/tests/gtest_merge_join_algorithm.cpp b/src/Processors/tests/gtest_merge_join_algorithm.cpp new file mode 100644 index 00000000000..9a8b70efc17 --- /dev/null +++ b/src/Processors/tests/gtest_merge_join_algorithm.cpp @@ -0,0 +1,95 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + + +using namespace DB; + + +QueryPipeline buildJoinPipeline(std::shared_ptr left_source, std::shared_ptr right_source) +{ + Blocks inputs; + inputs.emplace_back(left_source->getPort().getHeader()); + inputs.emplace_back(right_source->getPort().getHeader()); + Block out_header = { + ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "t1.x"), + ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "t2.x"), + }; + + TableJoin::JoinOnClause on_clause; + on_clause.key_names_left = {"x"}; + on_clause.key_names_right = {"x"}; + auto joining = std::make_shared( + JoinKind::Inner, + JoinStrictness::All, + on_clause, + inputs, out_header, /* max_block_size = */ 0); + + chassert(joining->getInputs().size() == 2); + + connect(left_source->getPort(), joining->getInputs().front()); + connect(right_source->getPort(), joining->getInputs().back()); + + auto * output_port = &joining->getOutputPort(); + + auto processors = std::make_shared(); + processors->emplace_back(std::move(left_source)); + processors->emplace_back(std::move(right_source)); + processors->emplace_back(std::move(joining)); + + QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port); + return pipeline; +} + + +std::shared_ptr createSourceWithSingleValue(size_t rows_per_chunk, size_t total_chunks) +{ + Block header = { + ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "x") + }; + + Chunks chunks; + + for (size_t i = 0; i < total_chunks; ++i) + { + auto col = ColumnUInt64::create(rows_per_chunk, 1); + chunks.emplace_back(Columns{std::move(col)}, rows_per_chunk); + } + + return std::make_shared(std::move(header), std::move(chunks)); +} + +TEST(FullSortingJoin, Simple) +try +{ + auto left_source = createSourceWithSingleValue(3, 10); + auto right_source = createSourceWithSingleValue(2, 15); + + auto pipeline = buildJoinPipeline(left_source, right_source); + PullingPipelineExecutor executor(pipeline); + + Block block; + + size_t total_result_rows = 0; + while (executor.pull(block)) + { + total_result_rows += block.rows(); + } + ASSERT_EQ(total_result_rows, 3 * 10 * 2 * 15); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} From 2412f8521985c8d31322ed04baa502c4e7543ef6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 18 Sep 2023 16:14:02 +0000 Subject: [PATCH 02/57] wip full sorting asof join --- .../Transforms/MergeJoinTransform.cpp | 138 ++++++++- .../Transforms/MergeJoinTransform.h | 38 ++- .../tests/gtest_full_sorting_join.cpp | 287 ++++++++++++++++++ .../tests/gtest_merge_join_algorithm.cpp | 95 ------ 4 files changed, 446 insertions(+), 112 deletions(-) create mode 100644 src/Processors/tests/gtest_full_sorting_join.cpp delete mode 100644 src/Processors/tests/gtest_merge_join_algorithm.cpp diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index a9fd7978249..8370e548fcb 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -34,13 +34,15 @@ namespace ErrorCodes namespace { -FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) +constexpr UInt64 DEFAULT_VALUE_INDEX = std::numeric_limits::max(); + +FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness) { SortDescription desc; desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(materializeBlock(block), desc); + return std::make_unique(materializeBlock(block), desc, strictness == JoinStrictness::Asof); } template @@ -90,9 +92,10 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, const SortCursorImpl & rhs, size_t rpos, + size_t key_length, int null_direction_hint) { - for (size_t i = 0; i < lhs.sort_columns_size; ++i) + for (size_t i = 0; i < key_length; ++i) { /// TODO(@vdimir): use nullableCompareAt only if there's nullable columns int cmp = nullableCompareAt(*lhs.sort_columns[i], *rhs.sort_columns[i], lpos, rpos, null_direction_hint); @@ -104,13 +107,18 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, size_t lpos, int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImpl & rhs, int null_direction_hint) { - return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), null_direction_hint); + return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); +} + +int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs) +{ + return nullableCompareAt(lhs.getAsofColumn(), rhs.getAsofColumn(), lhs->getRow(), rhs->getRow()); } bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) { /// The last row of left cursor is less than the current row of the right cursor. - int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), null_direction_hint); + int cmp = compareCursors(lhs, lhs.rows - 1, rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); return cmp < 0; } @@ -222,11 +230,11 @@ Chunk getRowFromChunk(const Chunk & chunk, size_t pos) return result; } -void inline addRange(PaddedPODArray & left_map, size_t start, size_t end) +void inline addRange(PaddedPODArray & values, UInt64 start, UInt64 end) { assert(end > start); - for (size_t i = start; i < end; ++i) - left_map.push_back(i); + for (UInt64 i = start; i < end; ++i) + values.push_back(i); } void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size_t num) @@ -235,6 +243,11 @@ void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size left_or_right_map.push_back(idx); } +void inline addMany(PaddedPODArray & values, UInt64 value, size_t num) +{ + values.resize_fill(values.size() + num, value); +} + } const Chunk & FullMergeJoinCursor::getCurrent() const @@ -283,9 +296,15 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( if (input_headers.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); - if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) + if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All && strictness != JoinStrictness::Asof) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for strictness {}", strictness); + if (strictness == JoinStrictness::Asof) + { + if (kind != JoinKind::Left && kind != JoinKind::Inner) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not implement ASOF {} join", kind); + } + if (!isInner(kind) && !isLeft(kind) && !isRight(kind) && !isFull(kind)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm is not implemented for kind {}", kind); @@ -293,8 +312,8 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); cursors = { - createCursor(input_headers[0], on_clause_.key_names_left), - createCursor(input_headers[1], on_clause_.key_names_right) + createCursor(input_headers[0], on_clause_.key_names_left, strictness), + createCursor(input_headers[1], on_clause_.key_names_right, strictness), }; MergeJoinAlgorithm::MergeJoinAlgorithm( @@ -313,6 +332,8 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( size_t left_idx = input_headers[0].getPositionByName(left_key); size_t right_idx = input_headers[1].getPositionByName(right_key); left_to_right_key_remap[left_idx] = right_idx; + if (strictness == JoinStrictness::Asof) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ASOF joins USING"); } const auto *smjPtr = typeid_cast(table_join.get()); @@ -321,6 +342,19 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( null_direction_hint = smjPtr->getNullDirection(); } + if (strictness == JoinStrictness::Asof) + setAsofInequality(join_ptr->getTableJoin().getAsofInequality()); +} + +void MergeJoinAlgorithm::setAsofInequality(ASOFJoinInequality asof_inequality_) +{ + if (strictness != JoinStrictness::Asof) + throw Exception(ErrorCodes::LOGICAL_ERROR, "setAsofInequality is only supported for ASOF joins"); + + if (asof_inequality_ == ASOFJoinInequality::None) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ASOF inequality cannot be None"); + + asof_inequality = asof_inequality_; } void MergeJoinAlgorithm::logElapsed(double seconds) @@ -770,6 +804,81 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::anyJoin() return Status(std::move(result)); } + +MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() +{ + auto & left_cursor = *cursors[0]; + if (!left_cursor->isValid()) + return Status(0); + + auto & right_cursor = *cursors[1]; + if (!right_cursor->isValid()) + return Status(1); + + PaddedPODArray left_map; + PaddedPODArray right_map; + + while (left_cursor->isValid() && right_cursor->isValid()) + { + auto lpos = left_cursor->getRow(); + auto rpos = right_cursor->getRow(); + auto cmp = compareCursors(*left_cursor, *right_cursor); + if (cmp == 0) + { + auto asof_cmp = compareAsofCursors(left_cursor, right_cursor); + if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) + { + /// First row in right table that is greater (or equal) than current row in left table + /// matches asof join condition the best + left_map.push_back(lpos); + right_map.push_back(rpos); + left_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Less || asof_inequality == ASOFJoinInequality::LessOrEquals) + { + /// Asof condition is not (yet) satisfied, skip row in right table + right_cursor->next(); + continue; + } + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join"); + } + else if (cmp < 0) + { + /// no matches for rows in left table, just pass them through + size_t num = nextDistinct(*left_cursor); + if (isLeft(kind)) + { + /// return them with default values at right side + addRange(left_map, lpos, lpos + num); + addMany(right_map, DEFAULT_VALUE_INDEX, num); + } + } + else + { + /// skip rows in right table until we find match for current row in left table + nextDistinct(*right_cursor); + } + } + + chassert(left_map.size() == right_map.size()); + Chunk result; + { + Columns lcols = indexColumns(left_cursor.getCurrent().getColumns(), left_map); + for (auto & col : lcols) + result.addColumn(std::move(col)); + + Columns rcols = indexColumns(right_cursor.getCurrent().getColumns(), right_map); + for (auto & col : rcols) + result.addColumn(std::move(col)); + } + UNUSED(asof_inequality); + return Status(std::move(result)); +} + + /// if `source_num == 0` get data from left cursor and fill defaults at right /// otherwise - vice versa Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const @@ -861,6 +970,9 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (strictness == JoinStrictness::All) return allJoin(); + if (strictness == JoinStrictness::Asof) + return asofJoin(); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported strictness '{}'", strictness); } @@ -878,10 +990,6 @@ MergeJoinTransform::MergeJoinTransform( /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) -<<<<<<< HEAD - , log(getLogger("MergeJoinTransform")) -======= ->>>>>>> b4a16f38320 (Add simple unit test for full sorting join) { } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index 0b0efa33722..3ee01e57992 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -195,10 +195,27 @@ private: class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) + FullMergeJoinCursor( + const Block & sample_block_, + const SortDescription & description_, + bool is_asof = false) : sample_block(sample_block_.cloneEmpty()) , desc(description_) { + if (desc.size() == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); + + if (is_asof) + { + /// For ASOF join prefix of sort description is used for equality comparison + /// and the last column is used for inequality comparison and is handled separately + + auto asof_column_description = desc.back(); + desc.pop_back(); + + chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1); + asof_column_position = sample_block.getPositionByName(asof_column_description.column_name); + } } bool fullyCompleted() const; @@ -209,17 +226,27 @@ public: SortCursorImpl * operator-> () { return &cursor; } const SortCursorImpl * operator-> () const { return &cursor; } + SortCursorImpl & operator* () { return cursor; } + const SortCursorImpl & operator* () const { return cursor; } + SortCursorImpl cursor; const Block & sampleBlock() const { return sample_block; } Columns sampleColumns() const { return sample_block.getColumns(); } + const IColumn & getAsofColumn() const + { + return *cursor.all_columns[asof_column_position]; + } + private: Block sample_block; SortDescription desc; Chunk current_chunk; bool recieved_all_blocks = false; + + size_t asof_column_position; }; /* @@ -242,8 +269,9 @@ public: void consume(Input & input, size_t source_num) override; Status merge() override; - void logElapsed(double seconds); + void setAsofInequality(ASOFJoinInequality asof_inequality_); + void logElapsed(double seconds); private: std::optional handleAnyJoinState(); Status anyJoin(); @@ -251,13 +279,17 @@ private: std::optional handleAllJoinState(); Status allJoin(); + Status asofJoin(); + Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; + /// For `USING` join key columns should have values from right side instead of defaults std::unordered_map left_to_right_key_remap; std::array cursors; + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None; /// Keep some state to make handle data from different blocks AnyJoinState any_join_state; @@ -305,6 +337,8 @@ public: String getName() const override { return "MergeJoinTransform"; } + void setAsofInequality(ASOFJoinInequality asof_inequality_) { algorithm.setAsofInequality(asof_inequality_); } + protected: void onFinish() override; }; diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp new file mode 100644 index 00000000000..888e280b55f --- /dev/null +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -0,0 +1,287 @@ +#include + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + + +using namespace DB; + +UInt64 getAndPrintRandomSeed() +{ + UInt64 seed = randomSeed(); + std::cerr << "TEST_RANDOM_SEED: " << seed << std::endl; + return seed; +} + +static UInt64 TEST_RANDOM_SEED = getAndPrintRandomSeed(); + +static pcg64 rng(TEST_RANDOM_SEED); + + +QueryPipeline buildJoinPipeline( + std::shared_ptr left_source, + std::shared_ptr right_source, + size_t key_length = 1, + JoinKind kind = JoinKind::Inner, + JoinStrictness strictness = JoinStrictness::All, + ASOFJoinInequality asof_inequality = ASOFJoinInequality::None) +{ + Blocks inputs; + inputs.emplace_back(left_source->getPort().getHeader()); + inputs.emplace_back(right_source->getPort().getHeader()); + + Block out_header; + for (const auto & input : inputs) + { + for (ColumnWithTypeAndName column : input) + { + if (&input == &inputs.front()) + column.name = "t1." + column.name; + else + column.name = "t2." + column.name; + out_header.insert(column); + } + } + + TableJoin::JoinOnClause on_clause; + for (size_t i = 0; i < key_length; ++i) + { + on_clause.key_names_left.emplace_back(inputs[0].getByPosition(i).name); + on_clause.key_names_right.emplace_back(inputs[1].getByPosition(i).name); + } + + auto joining = std::make_shared( + kind, + strictness, + on_clause, + inputs, out_header, /* max_block_size = */ 0); + + if (asof_inequality != ASOFJoinInequality::None) + joining->setAsofInequality(asof_inequality); + + chassert(joining->getInputs().size() == 2); + + connect(left_source->getPort(), joining->getInputs().front()); + connect(right_source->getPort(), joining->getInputs().back()); + + auto * output_port = &joining->getOutputPort(); + + auto processors = std::make_shared(); + processors->emplace_back(std::move(left_source)); + processors->emplace_back(std::move(right_source)); + processors->emplace_back(std::move(joining)); + + QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port); + return pipeline; +} + + +std::shared_ptr oneColumnSource(const std::vector> & values) +{ + Block header = { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "x") }; + Chunks chunks; + for (const auto & chunk_values : values) + { + auto column = ColumnUInt64::create(); + for (auto n : chunk_values) + column->insertValue(n); + chunks.emplace_back(Chunk(Columns{std::move(column)}, chunk_values.size())); + } + return std::make_shared(header, std::move(chunks)); +} + + +TEST(FullSortingJoin, Simple) +try +{ + auto left_source = oneColumnSource({ {1, 2, 3, 4, 5} }); + auto right_source = oneColumnSource({ {1}, {2}, {3}, {4}, {5} }); + + auto pipeline = buildJoinPipeline(left_source, right_source); + PullingPipelineExecutor executor(pipeline); + + Block block; + + size_t total_result_rows = 0; + while (executor.pull(block)) + total_result_rows += block.rows(); + + ASSERT_EQ(total_result_rows, 5); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +std::shared_ptr sourceFromRows( + const Block & header, const std::vector> & values, double break_prob = 0.0) +{ + Chunks chunks; + auto columns = header.cloneEmptyColumns(); + + std::uniform_real_distribution<> prob_dis(0.0, 1.0); + + + for (auto row : values) + { + if (!columns.empty() && (row.empty() || prob_dis(rng) < break_prob)) + { + size_t rows = columns.front()->size(); + chunks.emplace_back(std::move(columns), rows); + columns = header.cloneEmptyColumns(); + continue; + } + + for (size_t i = 0; i < columns.size(); ++i) + columns[i]->insert(row[i]); + } + + if (!columns.empty()) + chunks.emplace_back(std::move(columns), columns.front()->size()); + + return std::make_shared(header, std::move(chunks)); +} + + +std::vector> getValuesFromBlock(const Block & block, const Names & names) +{ + std::vector> result; + for (size_t i = 0; i < block.rows(); ++i) + { + auto & row = result.emplace_back(); + for (const auto & name : names) + block.getByName(name).column->get(i, row.emplace_back()); + } + return result; +} + + +Block executePipeline(QueryPipeline & pipeline) +{ + PullingPipelineExecutor executor(pipeline); + + Blocks result_blocks; + while (true) + { + Block block; + bool is_ok = executor.pull(block); + if (!is_ok) + break; + result_blocks.emplace_back(std::move(block)); + } + + return concatenateBlocks(result_blocks); +} + +TEST(FullSortingJoin, Asof) +try +{ + const std::vector chunk_break = {}; + + auto left_source = sourceFromRows({ + ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "key"), + ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "t"), + }, { + {"AMZN", 3}, + {"AMZN", 4}, + {"AMZN", 6}, + }); + + auto right_source = sourceFromRows({ + ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "key"), + ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "t"), + ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "value"), + }, { + {"AAPL", 1, 97}, + chunk_break, + {"AAPL", 2, 98}, + {"AAPL", 3, 99}, + {"AMZN", 1, 100}, + {"AMZN", 2, 110}, + chunk_break, + {"AMZN", 4, 130}, + {"AMZN", 5, 140}, + }); + + auto pipeline = buildJoinPipeline( + left_source, right_source, /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); + + Block result_block = executePipeline(pipeline); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + ASSERT_EQ(values.size(), 2); + ASSERT_EQ(values[0], (std::vector{"AMZN", 3u, 4u, 130u})); + ASSERT_EQ(values[1], (std::vector{"AMZN", 4u, 4u, 130u})); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST(FullSortingJoin, AsofOnlyColumn) +try +{ + const std::vector chunk_break = {}; + + auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); + + UInt64 p = std::uniform_int_distribution<>(0, 2)(rng); + double break_prob = p == 0 ? 0.0 : (p == 1 ? 0.5 : 1.0); + + auto right_source = sourceFromRows({ + ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "t"), + ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "value"), + }, { + {1, 101}, + {2, 102}, + {4, 104}, + {5, 105}, + {11, 111}, + {15, 115}, + }, + break_prob); + + auto pipeline = buildJoinPipeline( + left_source, right_source, /* key_length = */ 1, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); + + Block result_block = executePipeline(pipeline); + + ASSERT_EQ( + assert_cast(result_block.getByName("t1.x").column.get())->getData(), + (ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.t").column.get())->getData(), + (ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 15}) + ); + + ASSERT_EQ( + assert_cast(result_block.getByName("t2.value").column.get())->getData(), + (ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 115}) + ); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} diff --git a/src/Processors/tests/gtest_merge_join_algorithm.cpp b/src/Processors/tests/gtest_merge_join_algorithm.cpp deleted file mode 100644 index 9a8b70efc17..00000000000 --- a/src/Processors/tests/gtest_merge_join_algorithm.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - - -using namespace DB; - - -QueryPipeline buildJoinPipeline(std::shared_ptr left_source, std::shared_ptr right_source) -{ - Blocks inputs; - inputs.emplace_back(left_source->getPort().getHeader()); - inputs.emplace_back(right_source->getPort().getHeader()); - Block out_header = { - ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "t1.x"), - ColumnWithTypeAndName(ColumnUInt8::create(), std::make_shared(), "t2.x"), - }; - - TableJoin::JoinOnClause on_clause; - on_clause.key_names_left = {"x"}; - on_clause.key_names_right = {"x"}; - auto joining = std::make_shared( - JoinKind::Inner, - JoinStrictness::All, - on_clause, - inputs, out_header, /* max_block_size = */ 0); - - chassert(joining->getInputs().size() == 2); - - connect(left_source->getPort(), joining->getInputs().front()); - connect(right_source->getPort(), joining->getInputs().back()); - - auto * output_port = &joining->getOutputPort(); - - auto processors = std::make_shared(); - processors->emplace_back(std::move(left_source)); - processors->emplace_back(std::move(right_source)); - processors->emplace_back(std::move(joining)); - - QueryPipeline pipeline(QueryPlanResourceHolder{}, processors, output_port); - return pipeline; -} - - -std::shared_ptr createSourceWithSingleValue(size_t rows_per_chunk, size_t total_chunks) -{ - Block header = { - ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "x") - }; - - Chunks chunks; - - for (size_t i = 0; i < total_chunks; ++i) - { - auto col = ColumnUInt64::create(rows_per_chunk, 1); - chunks.emplace_back(Columns{std::move(col)}, rows_per_chunk); - } - - return std::make_shared(std::move(header), std::move(chunks)); -} - -TEST(FullSortingJoin, Simple) -try -{ - auto left_source = createSourceWithSingleValue(3, 10); - auto right_source = createSourceWithSingleValue(2, 15); - - auto pipeline = buildJoinPipeline(left_source, right_source); - PullingPipelineExecutor executor(pipeline); - - Block block; - - size_t total_result_rows = 0; - while (executor.pull(block)) - { - total_result_rows += block.rows(); - } - ASSERT_EQ(total_result_rows, 3 * 10 * 2 * 15); -} -catch (Exception & e) -{ - std::cout << e.getStackTraceString() << std::endl; - throw; -} From 4079f25a3865f23f23cec9c43aefe241b58e7972 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 20 Sep 2023 12:33:29 +0000 Subject: [PATCH 03/57] fix test --- .../tests/gtest_full_sorting_join.cpp | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 888e280b55f..b3b462ee1e3 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -134,26 +134,37 @@ std::shared_ptr sourceFromRows( { Chunks chunks; auto columns = header.cloneEmptyColumns(); + chassert(!columns.empty()); std::uniform_real_distribution<> prob_dis(0.0, 1.0); - - for (auto row : values) + for (const auto & row : values) { - if (!columns.empty() && (row.empty() || prob_dis(rng) < break_prob)) + if (!columns.front()->empty() && (row.empty() || prob_dis(rng) < break_prob)) { size_t rows = columns.front()->size(); chunks.emplace_back(std::move(columns), rows); columns = header.cloneEmptyColumns(); - continue; + if (row.empty()) + continue; } + chassert(row.size() == columns.size()); for (size_t i = 0; i < columns.size(); ++i) columns[i]->insert(row[i]); } - if (!columns.empty()) - chunks.emplace_back(std::move(columns), columns.front()->size()); + if (!columns.front()->empty()) + { + size_t rows = columns.front()->size(); + chunks.emplace_back(std::move(columns), rows); + } + + /// Check that code above is correct. + size_t total_result_rows = 0; + for (const auto & chunk : chunks) + total_result_rows += chunk.getNumRows(); + chassert(total_result_rows == values.size()); return std::make_shared(header, std::move(chunks)); } @@ -225,9 +236,11 @@ try Block result_block = executePipeline(pipeline); auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); - ASSERT_EQ(values.size(), 2); - ASSERT_EQ(values[0], (std::vector{"AMZN", 3u, 4u, 130u})); - ASSERT_EQ(values[1], (std::vector{"AMZN", 4u, 4u, 130u})); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 4u, 130u}, + {"AMZN", 4u, 4u, 130u}, + })); } catch (Exception & e) { @@ -272,12 +285,12 @@ try ASSERT_EQ( assert_cast(result_block.getByName("t2.t").column.get())->getData(), - (ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 15}) + (ColumnUInt64::Container{4, 4, 4, 4, 4, 5, 5, 11, 11, 11, 11}) ); ASSERT_EQ( assert_cast(result_block.getByName("t2.value").column.get())->getData(), - (ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 115}) + (ColumnUInt64::Container{104, 104, 104, 104, 104, 105, 105, 111, 111, 111, 111}) ); } catch (Exception & e) From 984d94e5f10e7e1c3cc953e1f5394c153e8fdedc Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 20 Sep 2023 15:46:19 +0000 Subject: [PATCH 04/57] upd gtest_full_sorting_join --- .../tests/gtest_full_sorting_join.cpp | 154 +++++++++--------- 1 file changed, 76 insertions(+), 78 deletions(-) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index b3b462ee1e3..4d7ce25a7e8 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -93,7 +93,7 @@ QueryPipeline buildJoinPipeline( std::shared_ptr oneColumnSource(const std::vector> & values) { - Block header = { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "x") }; + Block header = { ColumnWithTypeAndName(std::make_shared(), "x") }; Chunks chunks; for (const auto & chunk_values : values) { @@ -129,45 +129,46 @@ catch (Exception & e) throw; } -std::shared_ptr sourceFromRows( - const Block & header, const std::vector> & values, double break_prob = 0.0) +class SourceChunksBuilder { +public: + explicit SourceChunksBuilder(const Block & header_) + : header(header_) + { + current_chunk = header.cloneEmptyColumns(); + chassert(!current_chunk.empty()); + } + + SourceChunksBuilder & addRow(const std::vector & row) + { + chassert(row.size() == current_chunk.size()); + for (size_t i = 0; i < current_chunk.size(); ++i) + current_chunk[i]->insert(row[i]); + return *this; + } + + SourceChunksBuilder & addChunk() + { + if (current_chunk.front()->empty()) + return *this; + + size_t rows = current_chunk.front()->size(); + chunks.emplace_back(std::move(current_chunk), rows); + current_chunk = header.cloneEmptyColumns(); + return *this; + } + + std::shared_ptr build() + { + addChunk(); + return std::make_shared(header, std::move(chunks)); + } + +private: + Block header; Chunks chunks; - auto columns = header.cloneEmptyColumns(); - chassert(!columns.empty()); - - std::uniform_real_distribution<> prob_dis(0.0, 1.0); - - for (const auto & row : values) - { - if (!columns.front()->empty() && (row.empty() || prob_dis(rng) < break_prob)) - { - size_t rows = columns.front()->size(); - chunks.emplace_back(std::move(columns), rows); - columns = header.cloneEmptyColumns(); - if (row.empty()) - continue; - } - - chassert(row.size() == columns.size()); - for (size_t i = 0; i < columns.size(); ++i) - columns[i]->insert(row[i]); - } - - if (!columns.front()->empty()) - { - size_t rows = columns.front()->size(); - chunks.emplace_back(std::move(columns), rows); - } - - /// Check that code above is correct. - size_t total_result_rows = 0; - for (const auto & chunk : chunks) - total_result_rows += chunk.getNumRows(); - chassert(total_result_rows == values.size()); - - return std::make_shared(header, std::move(chunks)); -} + MutableColumns current_chunk; +}; std::vector> getValuesFromBlock(const Block & block, const Names & names) @@ -203,32 +204,30 @@ Block executePipeline(QueryPipeline & pipeline) TEST(FullSortingJoin, Asof) try { - const std::vector chunk_break = {}; + auto left_source = SourceChunksBuilder({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + }) + .addRow({"AMZN", 3}) + .addRow({"AMZN", 4}) + .addRow({"AMZN", 6}) + .build(); - auto left_source = sourceFromRows({ - ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "key"), - ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "t"), - }, { - {"AMZN", 3}, - {"AMZN", 4}, - {"AMZN", 6}, - }); - - auto right_source = sourceFromRows({ - ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "key"), - ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "t"), - ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "value"), - }, { - {"AAPL", 1, 97}, - chunk_break, - {"AAPL", 2, 98}, - {"AAPL", 3, 99}, - {"AMZN", 1, 100}, - {"AMZN", 2, 110}, - chunk_break, - {"AMZN", 4, 130}, - {"AMZN", 5, 140}, - }); + auto right_source = SourceChunksBuilder({ + {std::make_shared(), "key"}, + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }) + .addRow({"AAPL", 1, 97}) + .addChunk() + .addRow({"AAPL", 2, 98}) + .addRow({"AAPL", 3, 99}) + .addRow({"AMZN", 1, 100}) + .addRow({"AMZN", 2, 110}) + .addChunk() + .addRow({"AMZN", 4, 130}) + .addRow({"AMZN", 5, 140}) + .build(); auto pipeline = buildJoinPipeline( left_source, right_source, /* key_length = */ 2, @@ -252,25 +251,24 @@ catch (Exception & e) TEST(FullSortingJoin, AsofOnlyColumn) try { - const std::vector chunk_break = {}; - auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); UInt64 p = std::uniform_int_distribution<>(0, 2)(rng); - double break_prob = p == 0 ? 0.0 : (p == 1 ? 0.5 : 1.0); - auto right_source = sourceFromRows({ - ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "t"), - ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "value"), - }, { - {1, 101}, - {2, 102}, - {4, 104}, - {5, 105}, - {11, 111}, - {15, 115}, - }, - break_prob); + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "t"}, + {std::make_shared(), "value"}, + }); + + double break_prob = p == 0 ? 0.0 : (p == 1 ? 0.5 : 1.0); + std::uniform_real_distribution<> prob_dis(0.0, 1.0); + for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) + { + right_source_builder.addRow(row); + if (prob_dis(rng) < break_prob) + right_source_builder.addChunk(); + } + auto right_source = right_source_builder.build(); auto pipeline = buildJoinPipeline( left_source, right_source, /* key_length = */ 1, From 6330b466aa326e40eb7abca2699554c98241b342 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 21 Sep 2023 11:19:20 +0000 Subject: [PATCH 05/57] Add randomized test FullSortingJoin.AsofGeneratedTestData --- src/Common/getRandomASCIIString.cpp | 7 +- src/Common/getRandomASCIIString.h | 3 + .../Transforms/MergeJoinTransform.cpp | 2 +- .../tests/gtest_full_sorting_join.cpp | 121 +++++++++++++++--- 4 files changed, 116 insertions(+), 17 deletions(-) diff --git a/src/Common/getRandomASCIIString.cpp b/src/Common/getRandomASCIIString.cpp index 594b4cd3228..a295277b453 100644 --- a/src/Common/getRandomASCIIString.cpp +++ b/src/Common/getRandomASCIIString.cpp @@ -6,12 +6,17 @@ namespace DB { String getRandomASCIIString(size_t length) +{ + return getRandomASCIIString(length, thread_local_rng); +} + +String getRandomASCIIString(size_t length, pcg64 & rng) { std::uniform_int_distribution distribution('a', 'z'); String res; res.resize(length); for (auto & c : res) - c = distribution(thread_local_rng); + c = distribution(rng); return res; } diff --git a/src/Common/getRandomASCIIString.h b/src/Common/getRandomASCIIString.h index 627d2700ce3..19e1ff7120e 100644 --- a/src/Common/getRandomASCIIString.h +++ b/src/Common/getRandomASCIIString.h @@ -2,11 +2,14 @@ #include +#include + namespace DB { /// Slow random string. Useful for random names and things like this. Not for generating data. String getRandomASCIIString(size_t length); +String getRandomASCIIString(size_t length, pcg64 & rng); } diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 8370e548fcb..bfde5892289 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -874,7 +874,7 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() for (auto & col : rcols) result.addColumn(std::move(col)); } - UNUSED(asof_inequality); + return Status(std::move(result)); } diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 4d7ce25a7e8..741e945bfdb 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -1,30 +1,31 @@ #include - -#include #include #include -#include -#include -#include -#include -#include -#include - #include +#include +#include + #include + #include +#include +#include +#include +#include +#include #include +#include using namespace DB; UInt64 getAndPrintRandomSeed() { UInt64 seed = randomSeed(); - std::cerr << "TEST_RANDOM_SEED: " << seed << std::endl; + std::cerr << __FILE__ << "::" << "TEST_RANDOM_SEED = " << seed << "ull" << std::endl; return seed; } @@ -132,6 +133,8 @@ catch (Exception & e) class SourceChunksBuilder { public: + double break_prob = 0.0; + explicit SourceChunksBuilder(const Block & header_) : header(header_) { @@ -144,6 +147,10 @@ public: chassert(row.size() == current_chunk.size()); for (size_t i = 0; i < current_chunk.size(); ++i) current_chunk[i]->insert(row[i]); + + if (break_prob > 0.0 && std::uniform_real_distribution<>(0.0, 1.0)(rng) < break_prob) + addChunk(); + return *this; } @@ -184,7 +191,7 @@ std::vector> getValuesFromBlock(const Block & block, const Na } -Block executePipeline(QueryPipeline & pipeline) +Block executePipeline(QueryPipeline && pipeline) { PullingPipelineExecutor executor(pipeline); @@ -233,7 +240,7 @@ try left_source, right_source, /* key_length = */ 2, JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); - Block result_block = executePipeline(pipeline); + Block result_block = executePipeline(std::move(pipeline)); auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); ASSERT_EQ(values, (std::vector>{ @@ -253,13 +260,12 @@ try { auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); - UInt64 p = std::uniform_int_distribution<>(0, 2)(rng); - SourceChunksBuilder right_source_builder({ {std::make_shared(), "t"}, {std::make_shared(), "value"}, }); + UInt64 p = std::uniform_int_distribution<>(0, 2)(rng); double break_prob = p == 0 ? 0.0 : (p == 1 ? 0.5 : 1.0); std::uniform_real_distribution<> prob_dis(0.0, 1.0); for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) @@ -274,7 +280,7 @@ try left_source, right_source, /* key_length = */ 1, JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); - Block result_block = executePipeline(pipeline); + Block result_block = executePipeline(std::move(pipeline)); ASSERT_EQ( assert_cast(result_block.getByName("t1.x").column.get())->getData(), @@ -296,3 +302,88 @@ catch (Exception & e) std::cout << e.getStackTraceString() << std::endl; throw; } + +TEST(FullSortingJoin, AsofGeneratedTestData) +try +{ + auto left_source_builder = SourceChunksBuilder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + auto right_source_builder = SourceChunksBuilder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + /// uniform_int_distribution to have 0.0 and 1.0 probabilities + left_source_builder.break_prob = std::uniform_int_distribution<>(0, 5)(rng) / 5.0; + right_source_builder.break_prob = std::uniform_int_distribution<>(0, 5)(rng) / 5.0; + + auto get_next_key = [](UInt64 & k1, String & k2) + { + size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); + String new_k2 = getRandomASCIIString(str_len, rng); + if (new_k2.compare(k2) <= 0) + ++k1; + k2 = new_k2; + }; + + ColumnUInt64::Container expected; + + UInt64 k1 = 0; + String k2 = "asdfg"; + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + UInt64 left_t = 0; + size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + + left_source_builder.addRow({k1, k2, left_t, 10 * left_t}); + expected.push_back(10 * left_t); + + auto num_matches = 1 + std::poisson_distribution<>(4)(rng); + + size_t right_t = left_t; + for (size_t j = 0; j < num_matches; ++j) + { + right_t += std::uniform_int_distribution<>(0, 3)(rng); + right_source_builder.addRow({k1, k2, right_t, j == 0 ? 100 * left_t : 0}); + } + /// next left_t should be greater than right_t not to match with previous rows + left_t = right_t; + } + + /// generate some rows with greater left_t to check that they are not matched + num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, 10 * left_t}); + } + + get_next_key(k1, k2); + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.build(), right_source_builder.build(), + /* key_length = */ 3, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); + + ASSERT_EQ(assert_cast(block.getByName("t1.attr").column.get())->getData(), expected); + + for (auto & e : expected) + e = 10 * e; + ASSERT_EQ(assert_cast(block.getByName("t2.attr").column.get())->getData(), expected); +} +catch (Exception & e) { + std::cout << e.getStackTraceString() << std::endl; + throw; +} From da4f35556100847d527f194cee4b2f99b50bfa58 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 21 Sep 2023 11:38:37 +0000 Subject: [PATCH 06/57] upd FullSortingJoin.AsofGeneratedTestData --- .../tests/gtest_full_sorting_join.cpp | 56 +++++++++++++++---- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 741e945bfdb..49c9f58b1d5 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -208,6 +208,14 @@ Block executePipeline(QueryPipeline && pipeline) return concatenateBlocks(result_blocks); } +template +void checkColumn(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) +{ + const auto & actual = assert_cast *>(block.getByName(name).column.get())->getData(); + EXPECT_EQ(actual.size(), expected.size()); + ASSERT_EQ(actual, expected); +} + TEST(FullSortingJoin, Asof) try { @@ -306,18 +314,27 @@ catch (Exception & e) TEST(FullSortingJoin, AsofGeneratedTestData) try { + std::vector join_kinds = {JoinKind::Inner, JoinKind::Left}; + auto join_kind = join_kinds[std::uniform_int_distribution(0, join_kinds.size() - 1)(rng)]; + + std::vector asof_inequalities = { + ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals, + // ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals, + }; + auto asof_inequality = asof_inequalities[std::uniform_int_distribution(0, asof_inequalities.size() - 1)(rng)]; + auto left_source_builder = SourceChunksBuilder({ {std::make_shared(), "k1"}, {std::make_shared(), "k2"}, {std::make_shared(), "t"}, - {std::make_shared(), "attr"}, + {std::make_shared(), "attr"}, }); auto right_source_builder = SourceChunksBuilder({ {std::make_shared(), "k1"}, {std::make_shared(), "k2"}, {std::make_shared(), "t"}, - {std::make_shared(), "attr"}, + {std::make_shared(), "attr"}, }); /// uniform_int_distribution to have 0.0 and 1.0 probabilities @@ -333,14 +350,14 @@ try k2 = new_k2; }; - ColumnUInt64::Container expected; + ColumnInt64::Container expected; UInt64 k1 = 0; String k2 = "asdfg"; auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { - UInt64 left_t = 0; + Int64 left_t = 0; size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); for (size_t i = 0; i < num_left_rows; ++i) { @@ -351,11 +368,22 @@ try auto num_matches = 1 + std::poisson_distribution<>(4)(rng); - size_t right_t = left_t; + auto right_t = left_t; for (size_t j = 0; j < num_matches; ++j) { - right_t += std::uniform_int_distribution<>(0, 3)(rng); - right_source_builder.addRow({k1, k2, right_t, j == 0 ? 100 * left_t : 0}); + int min_step = 1; + if (asof_inequality == ASOFJoinInequality::LessOrEquals || asof_inequality == ASOFJoinInequality::GreaterOrEquals) + min_step = 0; + right_t += std::uniform_int_distribution<>(min_step, 3)(rng); + + bool is_match = false; + + if (asof_inequality == ASOFJoinInequality::LessOrEquals || asof_inequality == ASOFJoinInequality::Less) + is_match = j == 0; + else if (asof_inequality == ASOFJoinInequality::GreaterOrEquals || asof_inequality == ASOFJoinInequality::Greater) + is_match = j == num_matches - 1; + + right_source_builder.addRow({k1, k2, right_t, is_match ? 100 * left_t : -1}); } /// next left_t should be greater than right_t not to match with previous rows left_t = right_t; @@ -366,7 +394,10 @@ try for (size_t i = 0; i < num_left_rows; ++i) { left_t += std::uniform_int_distribution<>(1, 10)(rng); - left_source_builder.addRow({k1, k2, left_t, 10 * left_t}); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); } get_next_key(k1, k2); @@ -375,13 +406,14 @@ try Block result_block = executePipeline(buildJoinPipeline( left_source_builder.build(), right_source_builder.build(), /* key_length = */ 3, - JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); + join_kind, JoinStrictness::Asof, asof_inequality)); - ASSERT_EQ(assert_cast(block.getByName("t1.attr").column.get())->getData(), expected); + checkColumn(expected, result_block, "t1.attr"); for (auto & e : expected) - e = 10 * e; - ASSERT_EQ(assert_cast(block.getByName("t2.attr").column.get())->getData(), expected); + e = e < 0 ? 0 : 10 * e; /// non matched rows from left table have negative attr + + checkColumn(expected, result_block, "t2.attr"); } catch (Exception & e) { std::cout << e.getStackTraceString() << std::endl; From 97e3ee6e661620187bf115d948115936e36cdaf4 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 21 Sep 2023 11:40:41 +0000 Subject: [PATCH 07/57] upd gtest_full_sorting_join --- .../tests/gtest_full_sorting_join.cpp | 92 +++++++++++-------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 49c9f58b1d5..bbd321a78d7 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -142,7 +142,7 @@ public: chassert(!current_chunk.empty()); } - SourceChunksBuilder & addRow(const std::vector & row) + void addRow(const std::vector & row) { chassert(row.size() == current_chunk.size()); for (size_t i = 0; i < current_chunk.size(); ++i) @@ -150,25 +150,29 @@ public: if (break_prob > 0.0 && std::uniform_real_distribution<>(0.0, 1.0)(rng) < break_prob) addChunk(); - - return *this; } - SourceChunksBuilder & addChunk() + void addChunk() { if (current_chunk.front()->empty()) - return *this; + return; size_t rows = current_chunk.front()->size(); chunks.emplace_back(std::move(current_chunk), rows); current_chunk = header.cloneEmptyColumns(); - return *this; + return; } std::shared_ptr build() { addChunk(); - return std::make_shared(header, std::move(chunks)); + + /// copy chunk to allow reusing same builder + Chunks chunks_copy; + chunks_copy.reserve(chunks.size()); + for (const auto & chunk : chunks) + chunks_copy.emplace_back(chunk.clone()); + return std::make_shared(header, std::move(chunks_copy)); } private: @@ -219,42 +223,54 @@ void checkColumn(const typename ColumnVector::Container & expected, const Blo TEST(FullSortingJoin, Asof) try { - auto left_source = SourceChunksBuilder({ + SourceChunksBuilder left_source({ {std::make_shared(), "key"}, {std::make_shared(), "t"}, - }) - .addRow({"AMZN", 3}) - .addRow({"AMZN", 4}) - .addRow({"AMZN", 6}) - .build(); + }); - auto right_source = SourceChunksBuilder({ + left_source.addRow({"AMZN", 3}); + left_source.addRow({"AMZN", 4}); + left_source.addRow({"AMZN", 6}); + + SourceChunksBuilder right_source({ {std::make_shared(), "key"}, {std::make_shared(), "t"}, {std::make_shared(), "value"}, - }) - .addRow({"AAPL", 1, 97}) - .addChunk() - .addRow({"AAPL", 2, 98}) - .addRow({"AAPL", 3, 99}) - .addRow({"AMZN", 1, 100}) - .addRow({"AMZN", 2, 110}) - .addChunk() - .addRow({"AMZN", 4, 130}) - .addRow({"AMZN", 5, 140}) - .build(); + }); + right_source.addRow({"AAPL", 1, 97}); + right_source.addChunk(); + right_source.addRow({"AAPL", 2, 98}); + right_source.addRow({"AAPL", 3, 99}); + right_source.addRow({"AMZN", 1, 100}); + right_source.addRow({"AMZN", 2, 110}); + right_source.addChunk(); + right_source.addRow({"AMZN", 4, 130}); + right_source.addRow({"AMZN", 5, 140}); - auto pipeline = buildJoinPipeline( - left_source, right_source, /* key_length = */ 2, - JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals); + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.build(), right_source.build(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); - Block result_block = executePipeline(std::move(pipeline)); - auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 4u, 130u}, + {"AMZN", 4u, 4u, 130u}, + })); + } - ASSERT_EQ(values, (std::vector>{ - {"AMZN", 3u, 4u, 130u}, - {"AMZN", 4u, 4u, 130u}, - })); + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.build(), right_source.build(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 2u, 110u}, + {"AMZN", 4u, 4u, 130u}, + {"AMZN", 6u, 5u, 140u}, + })); + } } catch (Exception & e) { @@ -314,23 +330,23 @@ catch (Exception & e) TEST(FullSortingJoin, AsofGeneratedTestData) try { - std::vector join_kinds = {JoinKind::Inner, JoinKind::Left}; + std::array join_kinds{JoinKind::Inner, JoinKind::Left}; auto join_kind = join_kinds[std::uniform_int_distribution(0, join_kinds.size() - 1)(rng)]; - std::vector asof_inequalities = { + std::array asof_inequalities{ ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals, // ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals, }; auto asof_inequality = asof_inequalities[std::uniform_int_distribution(0, asof_inequalities.size() - 1)(rng)]; - auto left_source_builder = SourceChunksBuilder({ + SourceChunksBuilder left_source_builder({ {std::make_shared(), "k1"}, {std::make_shared(), "k2"}, {std::make_shared(), "t"}, {std::make_shared(), "attr"}, }); - auto right_source_builder = SourceChunksBuilder({ + SourceChunksBuilder right_source_builder({ {std::make_shared(), "k1"}, {std::make_shared(), "k2"}, {std::make_shared(), "t"}, From 4a1a7d4c6278246e097cf0583b401aa8ac3775fa Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 25 Sep 2023 10:54:08 +0000 Subject: [PATCH 08/57] Add randomized test FullSortingJoin.Any --- .../Transforms/MergeJoinTransform.cpp | 28 ++- .../Transforms/MergeJoinTransform.h | 5 + .../tests/gtest_full_sorting_join.cpp | 181 ++++++++++++++---- 3 files changed, 166 insertions(+), 48 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index bfde5892289..f5e277ea8c8 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -630,7 +630,7 @@ struct AnyJoinImpl FullMergeJoinCursor & right_cursor, PaddedPODArray & left_map, PaddedPODArray & right_map, - AnyJoinState & state, + AnyJoinState & any_join_state, int null_direction_hint) { assert(enabled); @@ -691,21 +691,21 @@ struct AnyJoinImpl } } - /// Remember index of last joined row to propagate it to next block + /// Remember last joined row to propagate it to next block - state.setValue({}); + any_join_state.setValue({}); if (!left_cursor->isValid()) { - state.set(0, left_cursor.cursor); + any_join_state.set(0, left_cursor.cursor); if (cmp == 0 && isLeft(kind)) - state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); + any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); } if (!right_cursor->isValid()) { - state.set(1, right_cursor.cursor); + any_join_state.set(1, right_cursor.cursor); if (cmp == 0 && isRight(kind)) - state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); + any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); } } }; @@ -720,7 +720,6 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState for (size_t source_num = 0; source_num < 2; ++source_num) { auto & current = *cursors[source_num]; - auto & state = any_join_state; if (any_join_state.keys[source_num].equals(current.cursor)) { size_t start_pos = current->getRow(); @@ -728,25 +727,22 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState if (length && isLeft(kind) && source_num == 0) { - if (state.value) - result = copyChunkResized(current.getCurrent(), state.value, start_pos, length); + if (any_join_state.value) + result = copyChunkResized(current.getCurrent(), any_join_state.value, start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } if (length && isRight(kind) && source_num == 1) { - if (state.value) - result = copyChunkResized(state.value, current.getCurrent(), start_pos, length); + if (any_join_state.value) + result = copyChunkResized(any_join_state.value, current.getCurrent(), start_pos, length); else result = createBlockWithDefaults(source_num, start_pos, length); } - /// We've found row with other key, no need to skip more rows with current key if (current->isValid()) - { - state.keys[source_num].reset(); - } + any_join_state.keys[source_num].reset(); } else { diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index 3ee01e57992..375c9ebd3cc 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -85,6 +85,11 @@ public: keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1); } + void reset(size_t source_num) + { + keys[source_num].reset(); + } + void setValue(Chunk value_) { value = std::move(value_); } bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); } diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index bbd321a78d7..1dc410bd6b8 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -213,13 +213,142 @@ Block executePipeline(QueryPipeline && pipeline) } template -void checkColumn(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) +void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) { const auto & actual = assert_cast *>(block.getByName(name).column.get())->getData(); EXPECT_EQ(actual.size(), expected.size()); ASSERT_EQ(actual, expected); } +template +void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name) +{ + const ColumnPtr & actual = block.getByName(name).column; + ASSERT_TRUE(checkColumn(*actual)); + ASSERT_TRUE(checkColumn(expected)); + EXPECT_EQ(actual->size(), expected.size()); + + auto dump_val = [](const IColumn & col, size_t i) -> String + { + Field value; + col.get(i, value); + return value.dump(); + }; + + size_t num_rows = std::min(actual->size(), expected.size()); + for (size_t i = 0; i < num_rows; ++i) + ASSERT_EQ(actual->compareAt(i, i, expected, 1), 0) << dump_val(*actual, i) << " != " << dump_val(expected, i) << " at row " << i; +} + +template +T getRandomFrom(const std::initializer_list & opts) +{ + std::vector options(opts.begin(), opts.end()); + size_t idx = std::uniform_int_distribution(0, options.size() - 1)(rng); + return options[idx]; +} + +/// Used to have accurate 0.0 and 1.0 probabilities +double getRandomDoubleQuantized(size_t quants = 5) +{ + return std::uniform_int_distribution(0, quants)(rng) / static_cast(quants); +} + +void generateNextKey(UInt64 & k1, String & k2) +{ + size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); + String new_k2 = getRandomASCIIString(str_len, rng); + if (new_k2.compare(k2) <= 0) + ++k1; + k2 = new_k2; +} + +TEST(FullSortingJoin, Any) +try +{ + JoinKind kind = getRandomFrom({JoinKind::Inner, JoinKind::Left, JoinKind::Right}); + + SourceChunksBuilder left_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "attr"}, + }); + + left_source.break_prob = getRandomDoubleQuantized(); + right_source.break_prob = getRandomDoubleQuantized(); + + size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng); + + auto expected_left = ColumnString::create(); + auto expected_right = ColumnString::create(); + + UInt64 k1 = 0; + String k2 = ""; + + auto get_attr = [&](const String & side, size_t idx) -> String + { + return toString(k1) + "_" + k2 + "_" + side + "_" + toString(idx); + }; + + for (size_t i = 0; i < num_keys; ++i) + { + generateNextKey(k1, k2); + + /// Key is present in left, right or both tables. Both tables is more probable. + size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng); + + size_t num_rows_left = key_presence == 0 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_left; ++j) + left_source.addRow({k1, k2, get_attr("left", j)}); + + size_t num_rows_right = key_presence == 1 ? 0 : std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_rows_right; ++j) + right_source.addRow({k1, k2, get_attr("right", j)}); + + String left_attr = num_rows_left ? get_attr("left", 0) : ""; + String right_attr = num_rows_right ? get_attr("right", 0) : ""; + + if (kind == JoinKind::Inner && num_rows_left && num_rows_right) + { + expected_left->insert(left_attr); + expected_right->insert(right_attr); + } + else if (kind == JoinKind::Left) + { + for (size_t j = 0; j < num_rows_left; ++j) + { + expected_left->insert(get_attr("left", j)); + expected_right->insert(right_attr); + } + } + else if (kind == JoinKind::Right) + { + for (size_t j = 0; j < num_rows_right; ++j) + { + expected_left->insert(left_attr); + expected_right->insert(get_attr("right", j)); + } + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source.build(), right_source.build(), /* key_length = */ 2, + kind, JoinStrictness::Any)); + assertColumnEq(*expected_left, result_block, "t1.attr"); + assertColumnEq(*expected_right, result_block, "t2.attr"); +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + TEST(FullSortingJoin, Asof) try { @@ -259,18 +388,18 @@ try })); } - { - Block result_block = executePipeline(buildJoinPipeline( - left_source.build(), right_source.build(), /* key_length = */ 2, - JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); - auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + // { + // Block result_block = executePipeline(buildJoinPipeline( + // left_source.build(), right_source.build(), /* key_length = */ 2, + // JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); + // auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); - ASSERT_EQ(values, (std::vector>{ - {"AMZN", 3u, 2u, 110u}, - {"AMZN", 4u, 4u, 130u}, - {"AMZN", 6u, 5u, 140u}, - })); - } + // ASSERT_EQ(values, (std::vector>{ + // {"AMZN", 3u, 2u, 110u}, + // {"AMZN", 4u, 4u, 130u}, + // {"AMZN", 6u, 5u, 140u}, + // })); + // } } catch (Exception & e) { @@ -330,14 +459,12 @@ catch (Exception & e) TEST(FullSortingJoin, AsofGeneratedTestData) try { - std::array join_kinds{JoinKind::Inner, JoinKind::Left}; - auto join_kind = join_kinds[std::uniform_int_distribution(0, join_kinds.size() - 1)(rng)]; + auto join_kind = getRandomFrom({JoinKind::Inner, JoinKind::Left}); - std::array asof_inequalities{ + auto asof_inequality = getRandomFrom({ ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals, // ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals, - }; - auto asof_inequality = asof_inequalities[std::uniform_int_distribution(0, asof_inequalities.size() - 1)(rng)]; + }); SourceChunksBuilder left_source_builder({ {std::make_shared(), "k1"}, @@ -353,18 +480,8 @@ try {std::make_shared(), "attr"}, }); - /// uniform_int_distribution to have 0.0 and 1.0 probabilities - left_source_builder.break_prob = std::uniform_int_distribution<>(0, 5)(rng) / 5.0; - right_source_builder.break_prob = std::uniform_int_distribution<>(0, 5)(rng) / 5.0; - - auto get_next_key = [](UInt64 & k1, String & k2) - { - size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); - String new_k2 = getRandomASCIIString(str_len, rng); - if (new_k2.compare(k2) <= 0) - ++k1; - k2 = new_k2; - }; + left_source_builder.break_prob = getRandomDoubleQuantized(); + right_source_builder.break_prob = getRandomDoubleQuantized(); ColumnInt64::Container expected; @@ -416,7 +533,7 @@ try expected.push_back(-10 * left_t); } - get_next_key(k1, k2); + generateNextKey(k1, k2); } Block result_block = executePipeline(buildJoinPipeline( @@ -424,12 +541,12 @@ try /* key_length = */ 3, join_kind, JoinStrictness::Asof, asof_inequality)); - checkColumn(expected, result_block, "t1.attr"); + assertColumnVectorEq(expected, result_block, "t1.attr"); for (auto & e : expected) e = e < 0 ? 0 : 10 * e; /// non matched rows from left table have negative attr - checkColumn(expected, result_block, "t2.attr"); + assertColumnVectorEq(expected, result_block, "t2.attr"); } catch (Exception & e) { std::cout << e.getStackTraceString() << std::endl; From 7e0c2d7bcb88f6d8575fc1f0e288e94f13456f87 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 26 Sep 2023 12:25:12 +0000 Subject: [PATCH 09/57] wip full sorting asof join --- .../Transforms/MergeJoinTransform.cpp | 300 +++++++++++++-- .../Transforms/MergeJoinTransform.h | 89 ++--- .../tests/gtest_full_sorting_join.cpp | 346 ++++++++++++++---- .../02276_full_sort_join_unsupported.sql | 2 +- 4 files changed, 575 insertions(+), 162 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index f5e277ea8c8..638f2f1cb10 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -34,8 +34,6 @@ namespace ErrorCodes namespace { -constexpr UInt64 DEFAULT_VALUE_INDEX = std::numeric_limits::max(); - FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, JoinStrictness strictness) { SortDescription desc; @@ -112,7 +110,7 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImp int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs) { - return nullableCompareAt(lhs.getAsofColumn(), rhs.getAsofColumn(), lhs->getRow(), rhs->getRow()); + return nullableCompareAt(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow()); } bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) @@ -250,6 +248,87 @@ void inline addMany(PaddedPODArray & values, UInt64 value, size_t num) } +JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos) +{ + row.reserve(cursor->sort_columns.size()); + for (const auto & col : cursor->sort_columns) + { + auto new_col = col->cloneEmpty(); + new_col->insertFrom(*col, pos); + row.push_back(std::move(new_col)); + } + if (auto asof_column = cursor.getAsofColumn()) + { + auto new_col = asof_column->cloneEmpty(); + new_col->insertFrom(*asof_column, pos); + row.push_back(std::move(new_col)); + } +} + +void JoinKeyRow::reset() +{ + row.clear(); +} + +bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const +{ + if (row.empty()) + return false; + + assert(this->row.size() == cursor->sort_columns_size); + for (size_t i = 0; i < cursor->sort_columns_size; ++i) + { + int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction); + if (cmp != 0) + return false; + } + return true; +} + +bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const +{ + if (!equals(cursor)) + return false; + int cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *row.back(), 1); + return (asof_inequality == ASOFJoinInequality::Less && cmp < 0) + || (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0) + || (asof_inequality == ASOFJoinInequality::Greater && cmp > 0) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && cmp >= 0); +} + +void AnyJoinState::set(size_t source_num, const FullMergeJoinCursor & cursor) +{ + assert(cursor->rows); + keys[source_num] = JoinKeyRow(cursor, cursor->rows - 1); +} + +void AnyJoinState::reset(size_t source_num) +{ + keys[source_num].reset(); + value.clear(); +} + +void AnyJoinState::setValue(Chunk value_) +{ + value = std::move(value_); +} + +bool AnyJoinState::empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + + +void AsofJoinState::set(const FullMergeJoinCursor & rcursor, size_t rpos) +{ + key = JoinKeyRow(rcursor, rpos); + value = rcursor.getCurrent().clone(); + value_row = rpos; +} + +void AsofJoinState::reset() +{ + key.reset(); + value.clear(); +} + const Chunk & FullMergeJoinCursor::getCurrent() const { return current_chunk; @@ -282,6 +361,31 @@ bool FullMergeJoinCursor::fullyCompleted() const return !cursor.isValid() && recieved_all_blocks; } +String FullMergeJoinCursor::dump() const +{ + Strings row_dump; + if (cursor.isValid()) + { + Field val; + for (size_t i = 0; i < cursor.sort_columns_size; ++i) + { + cursor.sort_columns[i]->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + + if (auto * asof_column = getAsofColumn()) + { + asof_column->get(cursor.getRow(), val); + row_dump.push_back(val.dump()); + } + } + + return fmt::format("<{}/{}{}>[{}]", + cursor.getRow(), cursor.rows, + recieved_all_blocks ? "(finished)" : "", + fmt::join(row_dump, ", ")); +} + MergeJoinAlgorithm::MergeJoinAlgorithm( JoinKind kind_, JoinStrictness strictness_, @@ -456,7 +560,7 @@ struct AllJoinImpl else { assert(state == nullptr); - state = std::make_unique(left_cursor.cursor, lpos, right_cursor.cursor, rpos); + state = std::make_unique(left_cursor, lpos, right_cursor, rpos); state->addRange(0, left_cursor.getCurrent().clone(), lpos, lnum); state->addRange(1, right_cursor.getCurrent().clone(), rpos, rnum); return; @@ -501,6 +605,17 @@ void dispatchKind(JoinKind kind, Args && ... args) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported join kind: \"{}\"", kind); } +MutableColumns MergeJoinAlgorithm::getEmptyResultColumns() const +{ + MutableColumns result_cols; + for (size_t i = 0; i < 2; ++i) + { + for (const auto & col : cursors[i]->sampleColumns()) + result_cols.push_back(col->cloneEmpty()); + } + return result_cols; +} + std::optional MergeJoinAlgorithm::handleAllJoinState() { if (all_join_state && all_join_state->finished()) @@ -514,7 +629,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState /// Accumulate blocks with same key in all_join_state for (size_t i = 0; i < 2; ++i) { - if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(cursors[i]->cursor)) + if (cursors[i]->cursor.isValid() && all_join_state->keys[i].equals(*cursors[i])) { size_t pos = cursors[i]->cursor.getRow(); size_t num = nextDistinct(cursors[i]->cursor); @@ -534,12 +649,7 @@ std::optional MergeJoinAlgorithm::handleAllJoinState stat.max_blocks_loaded = std::max(stat.max_blocks_loaded, all_join_state->blocksStored()); /// join all rows with current key - MutableColumns result_cols; - for (size_t i = 0; i < 2; ++i) - { - for (const auto & col : cursors[i]->sampleColumns()) - result_cols.push_back(col->cloneEmpty()); - } + MutableColumns result_cols = getEmptyResultColumns(); size_t total_rows = 0; while (!max_block_size || total_rows < max_block_size) @@ -567,6 +677,52 @@ std::optional MergeJoinAlgorithm::handleAllJoinState return {}; } +std::optional MergeJoinAlgorithm::handleAsofJoinState() +{ + if (strictness != JoinStrictness::Asof) + return {}; + + if (!cursors[1]->fullyCompleted()) + return {}; + + auto & left_cursor = *cursors[0]; + size_t lpos = left_cursor->getRow(); + const auto & left_columns = left_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); + + while (left_cursor->isValid() && asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + } + + while (isLeft(kind) && left_cursor->isValid()) + { + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + + left_cursor->next(); + } + + size_t result_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + if (result_rows) + return Status(Chunk(std::move(result_cols), result_rows)); + + return {}; +} + + MergeJoinAlgorithm::Status MergeJoinAlgorithm::allJoin() { PaddedPODArray idx_map[2]; @@ -696,14 +852,14 @@ struct AnyJoinImpl any_join_state.setValue({}); if (!left_cursor->isValid()) { - any_join_state.set(0, left_cursor.cursor); + any_join_state.set(0, left_cursor); if (cmp == 0 && isLeft(kind)) any_join_state.setValue(getRowFromChunk(right_cursor.getCurrent(), rpos)); } if (!right_cursor->isValid()) { - any_join_state.set(1, right_cursor.cursor); + any_join_state.set(1, right_cursor); if (cmp == 0 && isRight(kind)) any_join_state.setValue(getRowFromChunk(left_cursor.getCurrent(), lpos)); } @@ -720,7 +876,7 @@ std::optional MergeJoinAlgorithm::handleAnyJoinState for (size_t source_num = 0; source_num < 2; ++source_num) { auto & current = *cursors[source_num]; - if (any_join_state.keys[source_num].equals(current.cursor)) + if (any_join_state.keys[source_num].equals(current)) { size_t start_pos = current->getRow(); size_t length = nextDistinct(current.cursor); @@ -811,24 +967,35 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() if (!right_cursor->isValid()) return Status(1); - PaddedPODArray left_map; - PaddedPODArray right_map; + const auto & left_columns = left_cursor.getCurrent().getColumns(); + const auto & right_columns = right_cursor.getCurrent().getColumns(); + + MutableColumns result_cols = getEmptyResultColumns(); while (left_cursor->isValid() && right_cursor->isValid()) { auto lpos = left_cursor->getRow(); auto rpos = right_cursor->getRow(); auto cmp = compareCursors(*left_cursor, *right_cursor); + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ({}) <=> ({}) -> {}", __FILE__, __LINE__, left_cursor.dump(), right_cursor.dump(), cmp); + if (cmp == 0) { auto asof_cmp = compareAsofCursors(left_cursor, right_cursor); + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ({}) <=> ({}) -> asof {}", __FILE__, __LINE__, left_cursor.dump(), right_cursor.dump(), asof_cmp); + if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) { /// First row in right table that is greater (or equal) than current row in left table /// matches asof join condition the best - left_map.push_back(lpos); - right_map.push_back(rpos); + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : right_columns) + result_cols[i++]->insertFrom(*col, rpos); + chassert(i == result_cols.size()); + left_cursor->next(); continue; } @@ -839,39 +1006,99 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() right_cursor->next(); continue; } + + if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1) + || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0)) + { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + /// condition is satisfied, remember this row and move next to try to find better match + asof_join_state.set(right_cursor, rpos); + right_cursor->next(); + continue; + } + + if (asof_inequality == ASOFJoinInequality::Greater || asof_inequality == ASOFJoinInequality::GreaterOrEquals) + { + /// Asof condition is not satisfied anymore, use last matched row from right table + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + } + else + { + asof_join_state.reset(); + if (isLeft(kind)) + { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + + /// return row with default values at right side + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertDefault(); + chassert(i == result_cols.size()); + } + } + left_cursor->next(); + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + continue; + } + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO: implement ASOF equality join"); } else if (cmp < 0) { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + if (asof_join_state.hasMatch(left_cursor, asof_inequality)) + { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertFrom(*col, lpos); + for (const auto & col : asof_join_state.value.getColumns()) + result_cols[i++]->insertFrom(*col, asof_join_state.value_row); + chassert(i == result_cols.size()); + left_cursor->next(); + continue; + } + else + { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + asof_join_state.reset(); + } + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + /// no matches for rows in left table, just pass them through size_t num = nextDistinct(*left_cursor); - if (isLeft(kind)) + if (isLeft(kind) && num) { /// return them with default values at right side - addRange(left_map, lpos, lpos + num); - addMany(right_map, DEFAULT_VALUE_INDEX, num); + size_t i = 0; + for (const auto & col : left_columns) + result_cols[i++]->insertRangeFrom(*col, lpos, num); + for (; i < result_cols.size(); ++i) + result_cols[i]->insertManyDefaults(num); + chassert(i == result_cols.size()); } } else { + // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); + /// skip rows in right table until we find match for current row in left table nextDistinct(*right_cursor); } } - - chassert(left_map.size() == right_map.size()); - Chunk result; - { - Columns lcols = indexColumns(left_cursor.getCurrent().getColumns(), left_map); - for (auto & col : lcols) - result.addColumn(std::move(col)); - - Columns rcols = indexColumns(right_cursor.getCurrent().getColumns(), right_map); - for (auto & col : rcols) - result.addColumn(std::move(col)); - } - - return Status(std::move(result)); + size_t num_rows = result_cols.empty() ? 0 : result_cols.front()->size(); + return Status(Chunk(std::move(result_cols), num_rows)); } @@ -929,6 +1156,9 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() return std::move(*result); } + if (auto result = handleAsofJoinState()) + return std::move(*result); + if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) { if (!cursors[0]->fullyCompleted() && isLeftOrFull(kind)) diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index 375c9ebd3cc..dbdda0b166b 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -37,62 +37,28 @@ using FullMergeJoinCursorPtr = std::unique_ptr; /// Used instead of storing previous block struct JoinKeyRow { - std::vector row; - JoinKeyRow() = default; - explicit JoinKeyRow(const SortCursorImpl & impl_, size_t pos) - { - row.reserve(impl_.sort_columns.size()); - for (const auto & col : impl_.sort_columns) - { - auto new_col = col->cloneEmpty(); - new_col->insertFrom(*col, pos); - row.push_back(std::move(new_col)); - } - } + JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos); - void reset() - { - row.clear(); - } + bool equals(const FullMergeJoinCursor & cursor) const; + bool asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const; - bool equals(const SortCursorImpl & impl) const - { - if (row.empty()) - return false; + void reset(); - assert(this->row.size() == impl.sort_columns_size); - for (size_t i = 0; i < impl.sort_columns_size; ++i) - { - int cmp = this->row[i]->compareAt(0, impl.getRow(), *impl.sort_columns[i], impl.desc[i].nulls_direction); - if (cmp != 0) - return false; - } - return true; - } + std::vector row; }; /// Remembers previous key if it was joined in previous block class AnyJoinState : boost::noncopyable { public: - AnyJoinState() = default; + void set(size_t source_num, const FullMergeJoinCursor & cursor); + void setValue(Chunk value_); - void set(size_t source_num, const SortCursorImpl & cursor) - { - assert(cursor.rows); - keys[source_num] = JoinKeyRow(cursor, cursor.rows - 1); - } + void reset(size_t source_num); - void reset(size_t source_num) - { - keys[source_num].reset(); - } - - void setValue(Chunk value_) { value = std::move(value_); } - - bool empty() const { return keys[0].row.empty() && keys[1].row.empty(); } + bool empty() const; /// current keys JoinKeyRow keys[2]; @@ -125,8 +91,8 @@ public: Chunk chunk; }; - AllJoinState(const SortCursorImpl & lcursor, size_t lpos, - const SortCursorImpl & rcursor, size_t rpos) + AllJoinState(const FullMergeJoinCursor & lcursor, size_t lpos, + const FullMergeJoinCursor & rcursor, size_t rpos) : keys{JoinKeyRow(lcursor, lpos), JoinKeyRow(rcursor, rpos)} { } @@ -194,6 +160,25 @@ private: size_t ridx = 0; }; + +class AsofJoinState : boost::noncopyable +{ +public: + void set(const FullMergeJoinCursor & rcursor, size_t rpos); + void reset(); + + bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) + { + if (value.empty()) + return false; + return key.asofMatch(cursor, asof_inequality); + } + + JoinKeyRow key; + Chunk value; + size_t value_row = 0; +}; + /* * Wrapper for SortCursorImpl */ @@ -239,11 +224,15 @@ public: const Block & sampleBlock() const { return sample_block; } Columns sampleColumns() const { return sample_block.getColumns(); } - const IColumn & getAsofColumn() const + const IColumn * getAsofColumn() const { - return *cursor.all_columns[asof_column_position]; + if (!asof_column_position) + return nullptr; + return cursor.all_columns[*asof_column_position]; } + String dump() const; + private: Block sample_block; SortDescription desc; @@ -251,7 +240,7 @@ private: Chunk current_chunk; bool recieved_all_blocks = false; - size_t asof_column_position; + std::optional asof_column_position; }; /* @@ -284,12 +273,13 @@ private: std::optional handleAllJoinState(); Status allJoin(); + std::optional handleAsofJoinState(); Status asofJoin(); + MutableColumns getEmptyResultColumns() const; Chunk createBlockWithDefaults(size_t source_num); Chunk createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const; - /// For `USING` join key columns should have values from right side instead of defaults std::unordered_map left_to_right_key_remap; @@ -299,6 +289,7 @@ private: /// Keep some state to make handle data from different blocks AnyJoinState any_join_state; std::unique_ptr all_join_state; + AsofJoinState asof_join_state; JoinKind kind; JoinStrictness strictness; diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 1dc410bd6b8..4e0727779b7 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -18,19 +18,53 @@ #include #include +#include +#include + + #include using namespace DB; +namespace +{ + +[[ maybe_unused ]] +String dumpBlock(std::shared_ptr source) +{ + WriteBufferFromOwnString buf; + { + Block header = source->getPort().getHeader(); + QueryPipeline pipeline(source); + auto format = std::make_shared(buf, header, FormatSettings{}, false); + pipeline.complete(std::move(format)); + + CompletedPipelineExecutor executor(pipeline); + executor.execute(); + } + return buf.str(); +} + +[[ maybe_unused ]] +String dumpBlock(const Block & block) +{ + Block header = block.cloneEmpty(); + Chunk data(block.getColumns(), block.rows()); + auto source = std::make_shared(header, std::move(data)); + return dumpBlock(std::move(source)); +} + UInt64 getAndPrintRandomSeed() { UInt64 seed = randomSeed(); - std::cerr << __FILE__ << "::" << "TEST_RANDOM_SEED = " << seed << "ull" << std::endl; + if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) + seed = std::stoull(random_seed); + + std::cerr << __FILE__ << " :: " << "TEST_RANDOM_SEED=" << seed << std::endl; return seed; } static UInt64 TEST_RANDOM_SEED = getAndPrintRandomSeed(); - static pcg64 rng(TEST_RANDOM_SEED); @@ -94,42 +128,29 @@ QueryPipeline buildJoinPipeline( std::shared_ptr oneColumnSource(const std::vector> & values) { - Block header = { ColumnWithTypeAndName(std::make_shared(), "x") }; + Block header = { + ColumnWithTypeAndName(std::make_shared(), "key"), + ColumnWithTypeAndName(std::make_shared(), "idx"), + }; + + UInt64 idx = 0; Chunks chunks; for (const auto & chunk_values : values) { - auto column = ColumnUInt64::create(); + auto key_column = ColumnUInt64::create(); + auto idx_column = ColumnUInt64::create(); + for (auto n : chunk_values) - column->insertValue(n); - chunks.emplace_back(Chunk(Columns{std::move(column)}, chunk_values.size())); + { + key_column->insertValue(n); + idx_column->insertValue(idx); + ++idx; + } + chunks.emplace_back(Chunk(Columns{std::move(key_column), std::move(idx_column)}, chunk_values.size())); } return std::make_shared(header, std::move(chunks)); } - -TEST(FullSortingJoin, Simple) -try -{ - auto left_source = oneColumnSource({ {1, 2, 3, 4, 5} }); - auto right_source = oneColumnSource({ {1}, {2}, {3}, {4}, {5} }); - - auto pipeline = buildJoinPipeline(left_source, right_source); - PullingPipelineExecutor executor(pipeline); - - Block block; - - size_t total_result_rows = 0; - while (executor.pull(block)) - total_result_rows += block.rows(); - - ASSERT_EQ(total_result_rows, 5); -} -catch (Exception & e) -{ - std::cout << e.getStackTraceString() << std::endl; - throw; -} - class SourceChunksBuilder { public: @@ -163,7 +184,7 @@ public: return; } - std::shared_ptr build() + std::shared_ptr getSource() { addChunk(); @@ -215,9 +236,11 @@ Block executePipeline(QueryPipeline && pipeline) template void assertColumnVectorEq(const typename ColumnVector::Container & expected, const Block & block, const std::string & name) { - const auto & actual = assert_cast *>(block.getByName(name).column.get())->getData(); - EXPECT_EQ(actual.size(), expected.size()); - ASSERT_EQ(actual, expected); + const auto * actual = typeid_cast *>(block.getByName(name).column.get()); + ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); + + EXPECT_EQ(actual->getData().size(), expected.size()); + ASSERT_EQ(actual->getData(), expected) << "column name: " << name; } template @@ -263,7 +286,90 @@ void generateNextKey(UInt64 & k1, String & k2) k2 = new_k2; } -TEST(FullSortingJoin, Any) +bool isStrict(ASOFJoinInequality inequality) +{ + return inequality == ASOFJoinInequality::Less || inequality == ASOFJoinInequality::Greater; +} + +} + +TEST(FullSortingJoin, AllAnyOneKey) +try +{ + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::All)); + + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {1, 2, 3, 4, 5} }), + oneColumnSource({ {1}, {2}, {3}, {4}, {5} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner All"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::All)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 0, 1, 2, 3, 3, 4, 5}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 4, 4, 4, 3, 4, 5, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + SCOPED_TRACE("Inner Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Inner, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 4}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 5}), result, "t2.idx"); + } + { + + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2}, {2, 3}, {3, 5} }), + oneColumnSource({ {1, 1, 1}, {2, 2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } + { + SCOPED_TRACE("Left Any"); + Block result = executePipeline(buildJoinPipeline( + oneColumnSource({ {2, 2, 2, 2}, {3}, {3, 5} }), + oneColumnSource({ {1, 1, 1, 2}, {2}, {3, 4} }), + 1, JoinKind::Left, JoinStrictness::Any)); + assertColumnVectorEq(ColumnUInt64::Container({0, 1, 2, 3, 4, 5, 6}), result, "t1.idx"); + assertColumnVectorEq(ColumnUInt64::Container({3, 3, 3, 3, 5, 5, 0}), result, "t2.idx"); + } +} +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + + +TEST(FullSortingJoin, AnyRandomized) try { JoinKind kind = getRandomFrom({JoinKind::Inner, JoinKind::Left, JoinKind::Right}); @@ -288,7 +394,7 @@ try auto expected_left = ColumnString::create(); auto expected_right = ColumnString::create(); - UInt64 k1 = 0; + UInt64 k1 = 1; String k2 = ""; auto get_attr = [&](const String & side, size_t idx) -> String @@ -338,7 +444,7 @@ try } Block result_block = executePipeline(buildJoinPipeline( - left_source.build(), right_source.build(), /* key_length = */ 2, + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, kind, JoinStrictness::Any)); assertColumnEq(*expected_left, result_block, "t1.attr"); assertColumnEq(*expected_right, result_block, "t2.attr"); @@ -356,10 +462,10 @@ try {std::make_shared(), "key"}, {std::make_shared(), "t"}, }); - left_source.addRow({"AMZN", 3}); left_source.addRow({"AMZN", 4}); left_source.addRow({"AMZN", 6}); + left_source.addRow({"SBUX", 10}); SourceChunksBuilder right_source({ {std::make_shared(), "key"}, @@ -371,14 +477,19 @@ try right_source.addRow({"AAPL", 2, 98}); right_source.addRow({"AAPL", 3, 99}); right_source.addRow({"AMZN", 1, 100}); + right_source.addRow({"AMZN", 2, 0}); + right_source.addChunk(); right_source.addRow({"AMZN", 2, 110}); right_source.addChunk(); right_source.addRow({"AMZN", 4, 130}); right_source.addRow({"AMZN", 5, 140}); + right_source.addRow({"SBUX", 8, 180}); + right_source.addChunk(); + right_source.addRow({"SBUX", 9, 190}); { Block result_block = executePipeline(buildJoinPipeline( - left_source.build(), right_source.build(), /* key_length = */ 2, + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::LessOrEquals)); auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); @@ -388,18 +499,19 @@ try })); } - // { - // Block result_block = executePipeline(buildJoinPipeline( - // left_source.build(), right_source.build(), /* key_length = */ 2, - // JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); - // auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); + { + Block result_block = executePipeline(buildJoinPipeline( + left_source.getSource(), right_source.getSource(), /* key_length = */ 2, + JoinKind::Inner, JoinStrictness::Asof, ASOFJoinInequality::GreaterOrEquals)); + auto values = getValuesFromBlock(result_block, {"t1.key", "t1.t", "t2.t", "t2.value"}); - // ASSERT_EQ(values, (std::vector>{ - // {"AMZN", 3u, 2u, 110u}, - // {"AMZN", 4u, 4u, 130u}, - // {"AMZN", 6u, 5u, 140u}, - // })); - // } + ASSERT_EQ(values, (std::vector>{ + {"AMZN", 3u, 2u, 110u}, + {"AMZN", 4u, 4u, 130u}, + {"AMZN", 6u, 5u, 140u}, + {"SBUX", 10u, 9u, 190u}, + })); + } } catch (Exception & e) { @@ -418,8 +530,7 @@ try {std::make_shared(), "value"}, }); - UInt64 p = std::uniform_int_distribution<>(0, 2)(rng); - double break_prob = p == 0 ? 0.0 : (p == 1 ? 0.5 : 1.0); + double break_prob = getRandomDoubleQuantized(2); std::uniform_real_distribution<> prob_dis(0.0, 1.0); for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) { @@ -427,7 +538,7 @@ try if (prob_dis(rng) < break_prob) right_source_builder.addChunk(); } - auto right_source = right_source_builder.build(); + auto right_source = right_source_builder.getSource(); auto pipeline = buildJoinPipeline( left_source, right_source, /* key_length = */ 1, @@ -436,7 +547,7 @@ try Block result_block = executePipeline(std::move(pipeline)); ASSERT_EQ( - assert_cast(result_block.getByName("t1.x").column.get())->getData(), + assert_cast(result_block.getByName("t1.key").column.get())->getData(), (ColumnUInt64::Container{3, 3, 3, 3, 3, 5, 5, 6, 9, 9, 10}) ); @@ -456,15 +567,14 @@ catch (Exception & e) throw; } -TEST(FullSortingJoin, AsofGeneratedTestData) +TEST(FullSortingJoin, AsofLessGeneratedTestData) try { - auto join_kind = getRandomFrom({JoinKind::Inner, JoinKind::Left}); + auto join_kind = getRandomFrom({ JoinKind::Inner, JoinKind::Left }); - auto asof_inequality = getRandomFrom({ - ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals, - // ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals, - }); + auto asof_inequality = getRandomFrom({ ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); SourceChunksBuilder left_source_builder({ {std::make_shared(), "k1"}, @@ -485,11 +595,13 @@ try ColumnInt64::Container expected; - UInt64 k1 = 0; - String k2 = "asdfg"; + UInt64 k1 = 1; + String k2 = ""; auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { + generateNextKey(k1, k2); + Int64 left_t = 0; size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); for (size_t i = 0; i < num_left_rows; ++i) @@ -504,18 +616,10 @@ try auto right_t = left_t; for (size_t j = 0; j < num_matches; ++j) { - int min_step = 1; - if (asof_inequality == ASOFJoinInequality::LessOrEquals || asof_inequality == ASOFJoinInequality::GreaterOrEquals) - min_step = 0; + int min_step = isStrict(asof_inequality) ? 1 : 0; right_t += std::uniform_int_distribution<>(min_step, 3)(rng); - bool is_match = false; - - if (asof_inequality == ASOFJoinInequality::LessOrEquals || asof_inequality == ASOFJoinInequality::Less) - is_match = j == 0; - else if (asof_inequality == ASOFJoinInequality::GreaterOrEquals || asof_inequality == ASOFJoinInequality::Greater) - is_match = j == num_matches - 1; - + bool is_match = j == 0; right_source_builder.addRow({k1, k2, right_t, is_match ? 100 * left_t : -1}); } /// next left_t should be greater than right_t not to match with previous rows @@ -523,7 +627,7 @@ try } /// generate some rows with greater left_t to check that they are not matched - num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); + num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; for (size_t i = 0; i < num_left_rows; ++i) { left_t += std::uniform_int_distribution<>(1, 10)(rng); @@ -532,12 +636,10 @@ try if (join_kind == JoinKind::Left) expected.push_back(-10 * left_t); } - - generateNextKey(k1, k2); } Block result_block = executePipeline(buildJoinPipeline( - left_source_builder.build(), right_source_builder.build(), + left_source_builder.getSource(), right_source_builder.getSource(), /* key_length = */ 3, join_kind, JoinStrictness::Asof, asof_inequality)); @@ -548,7 +650,97 @@ try assertColumnVectorEq(expected, result_block, "t2.attr"); } -catch (Exception & e) { +catch (Exception & e) +{ + std::cout << e.getStackTraceString() << std::endl; + throw; +} + +TEST(FullSortingJoin, AsofGreaterGeneratedTestData) +try +{ + auto join_kind = getRandomFrom({ JoinKind::Inner, JoinKind::Left }); + + auto asof_inequality = getRandomFrom({ ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); + + SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + + SourceChunksBuilder left_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + SourceChunksBuilder right_source_builder({ + {std::make_shared(), "k1"}, + {std::make_shared(), "k2"}, + {std::make_shared(), "t"}, + {std::make_shared(), "attr"}, + }); + + left_source_builder.break_prob = getRandomDoubleQuantized(); + right_source_builder.break_prob = getRandomDoubleQuantized(); + + ColumnInt64::Container expected; + + UInt64 k1 = 1; + String k2 = ""; + UInt64 left_t = 0; + + auto key_num_total = std::uniform_int_distribution<>(1, 100)(rng); + for (size_t key_num = 0; key_num < key_num_total; ++key_num) + { + generateNextKey(k1, k2); + + /// generate some rows with smaller left_t to check that they are not matched + size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 10)(rng) : 0; + for (size_t i = 0; i < num_left_rows; ++i) + { + left_t += std::uniform_int_distribution<>(1, 10)(rng); + left_source_builder.addRow({k1, k2, left_t, -10 * left_t}); + + if (join_kind == JoinKind::Left) + expected.push_back(-10 * left_t); + } + + if (std::bernoulli_distribution(0.1)(rng)) + continue; + + size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng); + auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng); + for (size_t j = 0; j < num_right_matches; ++j) + { + right_t += std::uniform_int_distribution<>(0, 3)(rng); + bool is_match = j == num_right_matches - 1; + right_source_builder.addRow({k1, k2, right_t, is_match ? 100 * right_t : -1}); + } + + /// next left_t should be greater than (or equals) right_t to match with previous rows + left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 10)(rng); + size_t num_left_matches = std::uniform_int_distribution<>(1, 10)(rng); + for (size_t j = 0; j < num_left_matches; ++j) + { + left_t += std::uniform_int_distribution<>(0, 3)(rng); + left_source_builder.addRow({k1, k2, left_t, 10 * right_t}); + expected.push_back(10 * right_t); + } + } + + Block result_block = executePipeline(buildJoinPipeline( + left_source_builder.getSource(), right_source_builder.getSource(), + /* key_length = */ 3, + join_kind, JoinStrictness::Asof, asof_inequality)); + + assertColumnVectorEq(expected, result_block, "t1.attr"); + + for (auto & e : expected) + e = e < 0 ? 0 : 10 * e; /// non matched rows from left table have negative attr + + assertColumnVectorEq(expected, result_block, "t2.attr"); +} +catch (Exception & e) +{ std::cout << e.getStackTraceString() << std::endl; throw; } diff --git a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql index a4e60ff54dd..03936107563 100644 --- a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql +++ b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql @@ -19,7 +19,7 @@ SELECT * FROM t1 ANTI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENT SELECT * FROM t1 SEMI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENTED } -SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED } +-- SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED } SELECT * FROM t1 ANY JOIN t2 ON t1.key = t2.key SETTINGS any_join_distinct_right_table_keys = 1; -- { serverError NOT_IMPLEMENTED } From b6b55cfb187ddcbb6316d905c60d9be481bfb1a4 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 27 Sep 2023 09:05:50 +0000 Subject: [PATCH 10/57] fix asof join --- src/Processors/Transforms/MergeJoinTransform.cpp | 2 +- src/Processors/tests/gtest_full_sorting_join.cpp | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 638f2f1cb10..ad564557c36 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -1171,7 +1171,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() } /// check if blocks are not intersecting at all - if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0) + if (int cmp = totallyCompare(cursors[0]->cursor, cursors[1]->cursor, null_direction_hint); cmp != 0 && strictness != JoinStrictness::Asof) { if (cmp < 0) { diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 4e0727779b7..2ecf7805df4 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -30,13 +30,13 @@ namespace { [[ maybe_unused ]] -String dumpBlock(std::shared_ptr source) +String dumpBlockSource(std::shared_ptr source, bool mono_block = false) { WriteBufferFromOwnString buf; { Block header = source->getPort().getHeader(); QueryPipeline pipeline(source); - auto format = std::make_shared(buf, header, FormatSettings{}, false); + auto format = std::make_shared(buf, header, FormatSettings{}, mono_block); pipeline.complete(std::move(format)); CompletedPipelineExecutor executor(pipeline); @@ -51,7 +51,7 @@ String dumpBlock(const Block & block) Block header = block.cloneEmpty(); Chunk data(block.getColumns(), block.rows()); auto source = std::make_shared(header, std::move(data)); - return dumpBlock(std::move(source)); + return dumpBlockSource(std::move(source)); } UInt64 getAndPrintRandomSeed() @@ -732,6 +732,10 @@ try /* key_length = */ 3, join_kind, JoinStrictness::Asof, asof_inequality)); + // std::cerr << "============ left ============" << std::endl << dumpBlockSource(left_source_builder.getSource()) << std::endl; + // std::cerr << "============ right ============" << std::endl << dumpBlockSource(right_source_builder.getSource()) << std::endl; + // std::cerr << "============ result ============" << std::endl << dumpBlock(result_block) << std::endl; + assertColumnVectorEq(expected, result_block, "t1.attr"); for (auto & e : expected) From d015a023bb85c72c2747cebed12d065d5aad1159 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 27 Sep 2023 09:56:30 +0000 Subject: [PATCH 11/57] unique rng seed per each test in gtest_full_sorting_join --- .../tests/gtest_full_sorting_join.cpp | 122 ++++++++++-------- 1 file changed, 69 insertions(+), 53 deletions(-) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 2ecf7805df4..2bd3357eff9 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -54,20 +54,6 @@ String dumpBlock(const Block & block) return dumpBlockSource(std::move(source)); } -UInt64 getAndPrintRandomSeed() -{ - UInt64 seed = randomSeed(); - if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) - seed = std::stoull(random_seed); - - std::cerr << __FILE__ << " :: " << "TEST_RANDOM_SEED=" << seed << std::endl; - return seed; -} - -static UInt64 TEST_RANDOM_SEED = getAndPrintRandomSeed(); -static pcg64 rng(TEST_RANDOM_SEED); - - QueryPipeline buildJoinPipeline( std::shared_ptr left_source, std::shared_ptr right_source, @@ -154,7 +140,6 @@ std::shared_ptr oneColumnSource(const std::vector> class SourceChunksBuilder { public: - double break_prob = 0.0; explicit SourceChunksBuilder(const Block & header_) : header(header_) @@ -163,13 +148,20 @@ public: chassert(!current_chunk.empty()); } + void setBreakProbability(pcg64 & rng_) + { + /// random probability with possibility to have exact 0.0 and 1.0 values + break_prob = std::uniform_int_distribution(0, 5)(rng_) / static_cast(5); + rng = &rng_; + } + void addRow(const std::vector & row) { chassert(row.size() == current_chunk.size()); for (size_t i = 0; i < current_chunk.size(); ++i) current_chunk[i]->insert(row[i]); - if (break_prob > 0.0 && std::uniform_real_distribution<>(0.0, 1.0)(rng) < break_prob) + if (rng && std::uniform_real_distribution<>(0.0, 1.0)(*rng) < break_prob) addChunk(); } @@ -200,6 +192,9 @@ private: Block header; Chunks chunks; MutableColumns current_chunk; + + pcg64 * rng = nullptr; + double break_prob = 0.0; }; @@ -239,8 +234,20 @@ void assertColumnVectorEq(const typename ColumnVector::Container & expected, const auto * actual = typeid_cast *>(block.getByName(name).column.get()); ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector).name(); + auto get_first_diff = [&]() -> String + { + const auto & actual_data = actual->getData(); + size_t num_rows = std::min(expected.size(), actual_data.size()); + for (size_t i = 0; i < num_rows; ++i) + { + if (expected[i] != actual_data[i]) + return fmt::format(", expected: {}, actual: {} at row {}", expected[i], actual_data[i], i); + } + return ""; + }; + EXPECT_EQ(actual->getData().size(), expected.size()); - ASSERT_EQ(actual->getData(), expected) << "column name: " << name; + ASSERT_EQ(actual->getData(), expected) << "column name: " << name << get_first_diff(); } template @@ -264,20 +271,14 @@ void assertColumnEq(const IColumn & expected, const Block & block, const std::st } template -T getRandomFrom(const std::initializer_list & opts) +T getRandomFrom(pcg64 & rng, const std::initializer_list & opts) { std::vector options(opts.begin(), opts.end()); size_t idx = std::uniform_int_distribution(0, options.size() - 1)(rng); return options[idx]; } -/// Used to have accurate 0.0 and 1.0 probabilities -double getRandomDoubleQuantized(size_t quants = 5) -{ - return std::uniform_int_distribution(0, quants)(rng) / static_cast(quants); -} - -void generateNextKey(UInt64 & k1, String & k2) +void generateNextKey(pcg64 & rng, UInt64 & k1, String & k2) { size_t str_len = std::uniform_int_distribution<>(1, 10)(rng); String new_k2 = getRandomASCIIString(str_len, rng); @@ -293,6 +294,28 @@ bool isStrict(ASOFJoinInequality inequality) } +class FullSortingJoinRandomized : public ::testing::Test +{ +public: + FullSortingJoinRandomized() = default; + + void SetUp() override + { + UInt64 seed = randomSeed(); + if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) + seed = std::stoull(random_seed); + + std::cerr << "TEST_RANDOM_SEED=" << seed << std::endl; + rng = pcg64(seed); + } + + void TearDown() override + { + } + + pcg64 rng; +}; + TEST(FullSortingJoin, AllAnyOneKey) try { @@ -369,10 +392,10 @@ catch (Exception & e) } -TEST(FullSortingJoin, AnyRandomized) +TEST_F(FullSortingJoinRandomized, Any) try { - JoinKind kind = getRandomFrom({JoinKind::Inner, JoinKind::Left, JoinKind::Right}); + JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right}); SourceChunksBuilder left_source({ {std::make_shared(), "k1"}, @@ -386,8 +409,8 @@ try {std::make_shared(), "attr"}, }); - left_source.break_prob = getRandomDoubleQuantized(); - right_source.break_prob = getRandomDoubleQuantized(); + left_source.setBreakProbability(rng); + right_source.setBreakProbability(rng); size_t num_keys = std::uniform_int_distribution<>(100, 1000)(rng); @@ -404,7 +427,7 @@ try for (size_t i = 0; i < num_keys; ++i) { - generateNextKey(k1, k2); + generateNextKey(rng, k1, k2); /// Key is present in left, right or both tables. Both tables is more probable. size_t key_presence = std::uniform_int_distribution<>(0, 10)(rng); @@ -520,7 +543,7 @@ catch (Exception & e) } -TEST(FullSortingJoin, AsofOnlyColumn) +TEST_F(FullSortingJoinRandomized, AsofOnlyColumn) try { auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); @@ -530,14 +553,11 @@ try {std::make_shared(), "value"}, }); - double break_prob = getRandomDoubleQuantized(2); - std::uniform_real_distribution<> prob_dis(0.0, 1.0); + right_source_builder.setBreakProbability(rng); + for (const auto & row : std::vector>{ {1, 101}, {2, 102}, {4, 104}, {5, 105}, {11, 111}, {15, 115} }) - { right_source_builder.addRow(row); - if (prob_dis(rng) < break_prob) - right_source_builder.addChunk(); - } + auto right_source = right_source_builder.getSource(); auto pipeline = buildJoinPipeline( @@ -567,12 +587,12 @@ catch (Exception & e) throw; } -TEST(FullSortingJoin, AsofLessGeneratedTestData) +TEST_F(FullSortingJoinRandomized, AsofLessGeneratedTestData) try { - auto join_kind = getRandomFrom({ JoinKind::Inner, JoinKind::Left }); + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); - auto asof_inequality = getRandomFrom({ ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); @@ -590,8 +610,8 @@ try {std::make_shared(), "attr"}, }); - left_source_builder.break_prob = getRandomDoubleQuantized(); - right_source_builder.break_prob = getRandomDoubleQuantized(); + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); ColumnInt64::Container expected; @@ -600,7 +620,7 @@ try auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { - generateNextKey(k1, k2); + generateNextKey(rng, k1, k2); Int64 left_t = 0; size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); @@ -656,12 +676,12 @@ catch (Exception & e) throw; } -TEST(FullSortingJoin, AsofGreaterGeneratedTestData) +TEST_F(FullSortingJoinRandomized, AsofGreaterGeneratedTestData) try { - auto join_kind = getRandomFrom({ JoinKind::Inner, JoinKind::Left }); + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); - auto asof_inequality = getRandomFrom({ ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); + auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); @@ -679,8 +699,8 @@ try {std::make_shared(), "attr"}, }); - left_source_builder.break_prob = getRandomDoubleQuantized(); - right_source_builder.break_prob = getRandomDoubleQuantized(); + left_source_builder.setBreakProbability(rng); + right_source_builder.setBreakProbability(rng); ColumnInt64::Container expected; @@ -691,7 +711,7 @@ try auto key_num_total = std::uniform_int_distribution<>(1, 100)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { - generateNextKey(k1, k2); + generateNextKey(rng, k1, k2); /// generate some rows with smaller left_t to check that they are not matched size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 10)(rng) : 0; @@ -732,10 +752,6 @@ try /* key_length = */ 3, join_kind, JoinStrictness::Asof, asof_inequality)); - // std::cerr << "============ left ============" << std::endl << dumpBlockSource(left_source_builder.getSource()) << std::endl; - // std::cerr << "============ right ============" << std::endl << dumpBlockSource(right_source_builder.getSource()) << std::endl; - // std::cerr << "============ result ============" << std::endl << dumpBlock(result_block) << std::endl; - assertColumnVectorEq(expected, result_block, "t1.attr"); for (auto & e : expected) From 3e9090cbebb3cdb97f4dbb4b9d03c7e9d9e242b1 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 27 Sep 2023 11:14:37 +0000 Subject: [PATCH 12/57] fix --- .../sql-reference/statements/select/join.md | 5 ++- .../Transforms/MergeJoinTransform.cpp | 41 ++++++++++++------- .../Transforms/MergeJoinTransform.h | 23 +---------- .../tests/gtest_full_sorting_join.cpp | 36 +++++++++------- 4 files changed, 52 insertions(+), 53 deletions(-) diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 34c6016235a..96d9d26977d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -297,7 +297,7 @@ Algorithm requires the special column in tables. This column: - Must contain an ordered sequence. - Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). -- Can’t be the only column in the `JOIN` clause. +- For `hash` join algorithm it can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -337,7 +337,8 @@ For example, consider the following tables: `ASOF JOIN` can take the timestamp of a user event from `table_1` and find an event in `table_2` where the timestamp is closest to the timestamp of the event from `table_1` corresponding to the closest match condition. Equal timestamp values are the closest if available. Here, the `user_id` column can be used for joining on equality and the `ev_time` column can be used for joining on the closest match. In our example, `event_1_1` can be joined with `event_2_1` and `event_1_2` can be joined with `event_2_3`, but `event_2_2` can’t be joined. :::note -`ASOF` join is **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. +`ASOF JOIN` is supported only by `hash` and `full_sorting_merge` join algorithms. +It's **not** supported in the [Join](../../../engines/table-engines/special/join.md) table engine. ::: ## PASTE JOIN Usage diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index ad564557c36..0d9eab248d8 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -329,6 +329,26 @@ void AsofJoinState::reset() value.clear(); } +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof) + : sample_block(sample_block_.cloneEmpty()) + , desc(description_) +{ + if (desc.size() == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); + + if (is_asof) + { + /// For ASOF join prefix of sort description is used for equality comparison + /// and the last column is used for inequality comparison and is handled separately + + auto asof_column_description = desc.back(); + desc.pop_back(); + + chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1); + asof_column_position = sample_block.getPositionByName(asof_column_description.column_name); + } +} + const Chunk & FullMergeJoinCursor::getCurrent() const { return current_chunk; @@ -686,7 +706,6 @@ std::optional MergeJoinAlgorithm::handleAsofJoinStat return {}; auto & left_cursor = *cursors[0]; - size_t lpos = left_cursor->getRow(); const auto & left_columns = left_cursor.getCurrent().getColumns(); MutableColumns result_cols = getEmptyResultColumns(); @@ -695,7 +714,7 @@ std::optional MergeJoinAlgorithm::handleAsofJoinStat { size_t i = 0; for (const auto & col : left_columns) - result_cols[i++]->insertFrom(*col, lpos); + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); for (const auto & col : asof_join_state.value.getColumns()) result_cols[i++]->insertFrom(*col, asof_join_state.value_row); chassert(i == result_cols.size()); @@ -707,7 +726,7 @@ std::optional MergeJoinAlgorithm::handleAsofJoinStat /// return row with default values at right side size_t i = 0; for (const auto & col : left_columns) - result_cols[i++]->insertFrom(*col, lpos); + result_cols[i++]->insertFrom(*col, left_cursor->getRow()); for (; i < result_cols.size(); ++i) result_cols[i]->insertDefault(); chassert(i == result_cols.size()); @@ -977,12 +996,10 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() auto lpos = left_cursor->getRow(); auto rpos = right_cursor->getRow(); auto cmp = compareCursors(*left_cursor, *right_cursor); - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ({}) <=> ({}) -> {}", __FILE__, __LINE__, left_cursor.dump(), right_cursor.dump(), cmp); if (cmp == 0) { auto asof_cmp = compareAsofCursors(left_cursor, right_cursor); - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ({}) <=> ({}) -> asof {}", __FILE__, __LINE__, left_cursor.dump(), right_cursor.dump(), asof_cmp); if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) @@ -1010,7 +1027,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() if ((asof_inequality == ASOFJoinInequality::Greater && asof_cmp >= 1) || (asof_inequality == ASOFJoinInequality::GreaterOrEquals && asof_cmp >= 0)) { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); /// condition is satisfied, remember this row and move next to try to find better match asof_join_state.set(right_cursor, rpos); right_cursor->next(); @@ -1022,7 +1038,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() /// Asof condition is not satisfied anymore, use last matched row from right table if (asof_join_state.hasMatch(left_cursor, asof_inequality)) { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); size_t i = 0; for (const auto & col : left_columns) result_cols[i++]->insertFrom(*col, lpos); @@ -1035,7 +1050,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() asof_join_state.reset(); if (isLeft(kind)) { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); /// return row with default values at right side size_t i = 0; @@ -1047,7 +1061,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() } } left_cursor->next(); - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); continue; } @@ -1055,10 +1068,8 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() } else if (cmp < 0) { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); if (asof_join_state.hasMatch(left_cursor, asof_inequality)) { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); size_t i = 0; for (const auto & col : left_columns) @@ -1071,13 +1082,12 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() } else { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); asof_join_state.reset(); } - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); /// no matches for rows in left table, just pass them through size_t num = nextDistinct(*left_cursor); + if (isLeft(kind) && num) { /// return them with default values at right side @@ -1091,7 +1101,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() } else { - // LOG_DEBUG(&Poco::Logger::get("XXXX"), "{}:{} ", __FILE__, __LINE__); /// skip rows in right table until we find match for current row in left table nextDistinct(*right_cursor); @@ -1106,6 +1115,7 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() /// otherwise - vice versa Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t start, size_t num_rows) const { + ColumnRawPtrs cols; { const auto & columns_left = source_num == 0 ? cursors[0]->getCurrent().getColumns() : cursors[0]->sampleColumns(); @@ -1128,7 +1138,6 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num, size_t star cols.push_back(col.get()); } } - Chunk result_chunk; copyColumnsResized(cols, start, num_rows, result_chunk); return result_chunk; @@ -1144,6 +1153,7 @@ Chunk MergeJoinAlgorithm::createBlockWithDefaults(size_t source_num) IMergingAlgorithm::Status MergeJoinAlgorithm::merge() { + if (!cursors[0]->cursor.isValid() && !cursors[0]->fullyCompleted()) return Status(0); @@ -1161,6 +1171,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) { + if (!cursors[0]->fullyCompleted() && isLeftOrFull(kind)) return Status(createBlockWithDefaults(0)); diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index dbdda0b166b..c8ba857781e 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -185,28 +185,7 @@ public: class FullMergeJoinCursor : boost::noncopyable { public: - FullMergeJoinCursor( - const Block & sample_block_, - const SortDescription & description_, - bool is_asof = false) - : sample_block(sample_block_.cloneEmpty()) - , desc(description_) - { - if (desc.size() == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); - - if (is_asof) - { - /// For ASOF join prefix of sort description is used for equality comparison - /// and the last column is used for inequality comparison and is handled separately - - auto asof_column_description = desc.back(); - desc.pop_back(); - - chassert(asof_column_description.direction == 1 && asof_column_description.nulls_direction == 1); - asof_column_position = sample_block.getPositionByName(asof_column_description.column_name); - } - } + FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_, bool is_asof = false); bool fullyCompleted() const; void setChunk(Chunk && chunk); diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 2bd3357eff9..e3423aa0386 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -294,18 +295,25 @@ bool isStrict(ASOFJoinInequality inequality) } -class FullSortingJoinRandomized : public ::testing::Test +class FullSortingJoinTest : public ::testing::Test { public: - FullSortingJoinRandomized() = default; + FullSortingJoinTest() = default; void SetUp() override { + Poco::AutoPtr channel(new Poco::ConsoleChannel(std::cerr)); + Poco::Logger::root().setChannel(channel); + if (const char * test_log_level = std::getenv("TEST_LOG_LEVEL")) // NOLINT(concurrency-mt-unsafe) + Poco::Logger::root().setLevel(test_log_level); + else + Poco::Logger::root().setLevel("none"); + + UInt64 seed = randomSeed(); if (const char * random_seed = std::getenv("TEST_RANDOM_SEED")) // NOLINT(concurrency-mt-unsafe) seed = std::stoull(random_seed); - - std::cerr << "TEST_RANDOM_SEED=" << seed << std::endl; + std::cout << "TEST_RANDOM_SEED=" << seed << std::endl; rng = pcg64(seed); } @@ -316,7 +324,7 @@ public: pcg64 rng; }; -TEST(FullSortingJoin, AllAnyOneKey) +TEST_F(FullSortingJoinTest, AllAnyOneKey) try { { @@ -392,7 +400,7 @@ catch (Exception & e) } -TEST_F(FullSortingJoinRandomized, Any) +TEST_F(FullSortingJoinTest, AnySimple) try { JoinKind kind = getRandomFrom(rng, {JoinKind::Inner, JoinKind::Left, JoinKind::Right}); @@ -478,7 +486,7 @@ catch (Exception & e) throw; } -TEST(FullSortingJoin, Asof) +TEST_F(FullSortingJoinTest, AsofSimple) try { SourceChunksBuilder left_source({ @@ -543,7 +551,7 @@ catch (Exception & e) } -TEST_F(FullSortingJoinRandomized, AsofOnlyColumn) +TEST_F(FullSortingJoinTest, AsofOnlyColumn) try { auto left_source = oneColumnSource({ {3}, {3, 3, 3}, {3, 5, 5, 6}, {9, 9}, {10, 20} }); @@ -587,7 +595,7 @@ catch (Exception & e) throw; } -TEST_F(FullSortingJoinRandomized, AsofLessGeneratedTestData) +TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData) try { auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); @@ -676,7 +684,7 @@ catch (Exception & e) throw; } -TEST_F(FullSortingJoinRandomized, AsofGreaterGeneratedTestData) +TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData) try { auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); @@ -708,13 +716,13 @@ try String k2 = ""; UInt64 left_t = 0; - auto key_num_total = std::uniform_int_distribution<>(1, 100)(rng); + auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { generateNextKey(rng, k1, k2); /// generate some rows with smaller left_t to check that they are not matched - size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 10)(rng) : 0; + size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; for (size_t i = 0; i < num_left_rows; ++i) { left_t += std::uniform_int_distribution<>(1, 10)(rng); @@ -737,8 +745,8 @@ try } /// next left_t should be greater than (or equals) right_t to match with previous rows - left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 10)(rng); - size_t num_left_matches = std::uniform_int_distribution<>(1, 10)(rng); + left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng); + size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng); for (size_t j = 0; j < num_left_matches; ++j) { left_t += std::uniform_int_distribution<>(0, 3)(rng); From a8690947c705339113266c1ec8db05695a36e472 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 28 Sep 2023 12:50:24 +0000 Subject: [PATCH 13/57] enable stateless tessts for full sorting asof join --- src/Interpreters/InterpreterSelectQuery.cpp | 7 ++- src/Planner/PlannerJoinTree.cpp | 6 ++- .../Transforms/MergeJoinTransform.cpp | 2 - .../00927_asof_join_correct_bt.reference | 15 +++++++ .../00927_asof_join_correct_bt.sql | 30 ++++++++----- .../00927_asof_join_long.reference | 1 + .../0_stateless/00927_asof_join_long.sql | 17 ++++++- .../00927_asof_join_noninclusive.reference | 29 ++++++++++++ .../00927_asof_join_noninclusive.sql | 5 ++- .../00927_asof_join_other_types.reference | 45 +++++++++++++++++++ .../00927_asof_join_other_types.sh | 27 ----------- .../00927_asof_join_other_types.sql.j2 | 27 +++++++++++ .../0_stateless/00927_asof_joins.reference | 15 +++++++ .../queries/0_stateless/00927_asof_joins.sql | 8 +++- .../0_stateless/00976_asof_join_on.reference | 38 ++++++++++++++++ ..._join_on.sql => 00976_asof_join_on.sql.j2} | 8 ++++ 16 files changed, 231 insertions(+), 49 deletions(-) delete mode 100755 tests/queries/0_stateless/00927_asof_join_other_types.sh create mode 100755 tests/queries/0_stateless/00927_asof_join_other_types.sql.j2 rename tests/queries/0_stateless/{00976_asof_join_on.sql => 00976_asof_join_on.sql.j2} (90%) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index b72399df2c1..a0b53ad42d1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1726,7 +1726,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(query_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(*joined_plan, join_clause.key_names_right, JoinTableSide::Right); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index aa46f65d2d4..63acf194139 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1484,7 +1484,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { const auto & join_clause = table_join->getOnlyClause(); - bool kind_allows_filtering = isInner(join_kind) || isLeft(join_kind) || isRight(join_kind); + bool join_type_allows_filtering = (join_strictness == JoinStrictness::All || join_strictness == JoinStrictness::Any) + && (isInner(join_kind) || isLeft(join_kind) || isRight(join_kind)); + auto has_non_const = [](const Block & block, const auto & keys) { @@ -1504,7 +1506,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left) && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right); - if (settings.max_rows_in_set_to_optimize_join > 0 && kind_allows_filtering && has_non_const_keys) + if (settings.max_rows_in_set_to_optimize_join > 0 && join_type_allows_filtering && has_non_const_keys) { auto * left_set = add_create_set(left_plan, join_clause.key_names_left, JoinTableSide::Left); auto * right_set = add_create_set(right_plan, join_clause.key_names_right, JoinTableSide::Right); diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 0d9eab248d8..df56ffc2871 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -456,8 +456,6 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( size_t left_idx = input_headers[0].getPositionByName(left_key); size_t right_idx = input_headers[1].getPositionByName(right_key); left_to_right_key_remap[left_idx] = right_idx; - if (strictness == JoinStrictness::Asof) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ASOF joins USING"); } const auto *smjPtr = typeid_cast(table_join.get()); diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference index bb199d0159a..a398f9604fd 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -13,3 +13,18 @@ 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 +1 101 1 0 0 0 +1 102 2 2 102 1 +1 103 3 2 102 1 +1 104 4 4 104 1 +1 105 5 4 104 1 diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql index 281a81d51c0..761d6bacde6 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -4,20 +4,26 @@ DROP TABLE IF EXISTS B; CREATE TABLE A(k UInt32, t UInt32, a UInt64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO A(k,t,a) VALUES (1,101,1),(1,102,2),(1,103,3),(1,104,4),(1,105,5); -CREATE TABLE B(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B1(k UInt32, t UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B1(k,t,b) VALUES (1,102,2), (1,104,4); +CREATE TABLE B2(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(t UInt32, k UInt32, b UInt64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4); -CREATE TABLE B(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (1,102,2), (1,104,4); -SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); -DROP TABLE B; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); + +DROP TABLE B1; +DROP TABLE B2; +DROP TABLE B3; DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_long.reference b/tests/queries/0_stateless/00927_asof_join_long.reference index d4f015c68e4..ec40d2bc463 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.reference +++ b/tests/queries/0_stateless/00927_asof_join_long.reference @@ -1 +1,2 @@ 3000000 +3000000 diff --git a/tests/queries/0_stateless/00927_asof_join_long.sql b/tests/queries/0_stateless/00927_asof_join_long.sql index c03a06d48d4..7a73875e93e 100644 --- a/tests/queries/0_stateless/00927_asof_join_long.sql +++ b/tests/queries/0_stateless/00927_asof_join_long.sql @@ -2,15 +2,28 @@ DROP TABLE IF EXISTS tvs; +-- to use different algorithms for in subquery +SET allow_experimental_analyzer = 1; + CREATE TABLE tvs(k UInt32, t UInt32, tv UInt64) ENGINE = Memory; INSERT INTO tvs(k,t,tv) SELECT k, t, t FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys -CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times; +CROSS JOIN (SELECT toUInt32(number * 3) as t FROM numbers(10000)) tv_times +SETTINGS join_algorithm = 'hash'; SELECT SUM(trades.price - tvs.tv) FROM (SELECT k, t, t as price FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys - CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times) trades + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades ASOF LEFT JOIN tvs USING(k,t); +SELECT SUM(trades.price - tvs.tv) FROM +(SELECT k, t, t as price + FROM (SELECT toUInt32(number) AS k FROM numbers(1000)) keys + CROSS JOIN (SELECT toUInt32(number * 10) AS t FROM numbers(3000)) trade_times + SETTINGS join_algorithm = 'hash') trades +ASOF LEFT JOIN tvs USING(k,t) +SETTINGS join_algorithm = 'full_sorting_merge'; + DROP TABLE tvs; diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference index fe2844a2a43..d856372fb4a 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.reference +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.reference @@ -27,3 +27,32 @@ 2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +3 1970-01-01 00:00:01 1 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:02 2 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:03 3 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:04 4 0 1970-01-01 00:00:00 0 +3 1970-01-01 00:00:05 5 0 1970-01-01 00:00:00 0 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 +1 1970-01-01 00:00:02 2 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:03 3 2 1970-01-01 00:00:02 1 +1 1970-01-01 00:00:04 4 4 1970-01-01 00:00:04 1 +1 1970-01-01 00:00:05 5 4 1970-01-01 00:00:04 1 +2 1970-01-01 00:00:03 3 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:04 4 3 1970-01-01 00:00:03 2 +2 1970-01-01 00:00:05 5 3 1970-01-01 00:00:03 2 diff --git a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql index 5f15f3b593d..3cc99df4462 100644 --- a/tests/queries/0_stateless/00927_asof_join_noninclusive.sql +++ b/tests/queries/0_stateless/00927_asof_join_noninclusive.sql @@ -11,9 +11,12 @@ INSERT INTO B(k,t,b) VALUES (1,2,2),(1,4,4); INSERT INTO B(k,t,b) VALUES (2,3,3); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); - SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (A.k, A.t); +SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF INNER JOIN B ON A.k == B.k AND A.t >= B.t ORDER BY (A.k, A.t); SELECT A.k, toString(A.t, 'UTC'), A.a, B.b, toString(B.t, 'UTC'), B.k FROM A ASOF JOIN B USING(k,t) ORDER BY (A.k, A.t); DROP TABLE A; diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.reference b/tests/queries/0_stateless/00927_asof_join_other_types.reference index 80c85ec1ae3..ddbc24ff925 100644 --- a/tests/queries/0_stateless/00927_asof_join_other_types.reference +++ b/tests/queries/0_stateless/00927_asof_join_other_types.reference @@ -1,27 +1,72 @@ +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- 2 1970-01-01 02:00:01 1 0 2 1970-01-01 02:00:03 3 3 2 1970-01-01 02:00:05 5 3 +- +2 1970-01-01 02:00:01 1 0 +2 1970-01-01 02:00:03 3 3 +2 1970-01-01 02:00:05 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- 2 1 1 0 2 3 3 3 2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1 1 0 +2 3 3 3 +2 5 5 3 +- +2 1970-01-01 02:00:00.001 1 0 +2 1970-01-01 02:00:00.003 3 3 +2 1970-01-01 02:00:00.005 5 3 +- 2 1970-01-01 02:00:00.001 1 0 2 1970-01-01 02:00:00.003 3 3 2 1970-01-01 02:00:00.005 5 3 diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sh b/tests/queries/0_stateless/00927_asof_join_other_types.sh deleted file mode 100755 index 10173a3e43f..00000000000 --- a/tests/queries/0_stateless/00927_asof_join_other_types.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -e - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -for typename in "UInt32" "UInt64" "Float64" "Float32" "DateTime('Asia/Istanbul')" "Decimal32(5)" "Decimal64(5)" "Decimal128(5)" "DateTime64(3, 'Asia/Istanbul')" -do - $CLICKHOUSE_CLIENT -mn <= B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; SELECT A.a, A.t, B.b, B.t FROM A ASOF INNER JOIN B ON B.t <= A.t AND A.a == B.b ORDER BY (A.a, A.t); @@ -28,5 +34,7 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A WHERE B.t != 3 ORDER BY (A.a, A.t) ; +{% endfor %} + DROP TABLE A; DROP TABLE B; From 256ad60115ddd1eb8c8a5597478e057926fe30f6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 28 Sep 2023 12:52:20 +0000 Subject: [PATCH 14/57] fix style, clang tidy --- src/Planner/PlannerJoinTree.cpp | 8 +-- .../Transforms/MergeJoinTransform.cpp | 59 +++++++++------- .../Transforms/MergeJoinTransform.h | 2 +- .../tests/gtest_full_sorting_join.cpp | 27 +------- .../00927_asof_join_other_types.sql.j2 | 0 .../01116_asof_join_dolbyzerr.reference | 3 + .../0_stateless/01116_asof_join_dolbyzerr.sql | 14 ++++ .../02240_asof_join_biginteger.reference | 4 ++ .../02240_asof_join_biginteger.sql | 8 +++ ...n_in_left_table_clause_asof_join.reference | 1 + ...unction_in_left_table_clause_asof_join.sql | 12 ++++ .../03143_asof_join_ddb_long.reference | 2 + .../0_stateless/03143_asof_join_ddb_long.sql | 48 +++++++++++++ .../03144_asof_join_ddb_doubles.reference | 58 ++++++++++++++++ .../03144_asof_join_ddb_doubles.sql | 64 +++++++++++++++++ ...03145_asof_join_ddb_inequalities.reference | 68 +++++++++++++++++++ .../03145_asof_join_ddb_inequalities.sql | 63 +++++++++++++++++ .../03146_asof_join_ddb_merge_long.reference | 2 + .../03146_asof_join_ddb_merge_long.sql.j2 | 37 ++++++++++ 19 files changed, 424 insertions(+), 56 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/00927_asof_join_other_types.sql.j2 create mode 100644 tests/queries/0_stateless/03143_asof_join_ddb_long.reference create mode 100644 tests/queries/0_stateless/03143_asof_join_ddb_long.sql create mode 100644 tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference create mode 100644 tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql create mode 100644 tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference create mode 100644 tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql create mode 100644 tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference create mode 100644 tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 63acf194139..a9d9e11f458 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -75,7 +75,6 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int SYNTAX_ERROR; extern const int ACCESS_DENIED; extern const int PARAMETER_OUT_OF_BOUND; extern const int TOO_MANY_COLUMNS; @@ -1357,12 +1356,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ { if (!join_clause.hasASOF()) throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} no inequality in ASOF JOIN ON section.", - join_node.formatASTForErrorMessage()); - - if (table_join_clause.key_names_left.size() <= 1) - throw Exception(ErrorCodes::SYNTAX_ERROR, - "JOIN {} ASOF join needs at least one equi-join column", + "JOIN {} no inequality in ASOF JOIN ON section", join_node.formatASTForErrorMessage()); } diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index df56ffc2871..26e1ebb0b60 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -43,6 +43,13 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, return std::make_unique(materializeBlock(block), desc, strictness == JoinStrictness::Asof); } +bool isNullAt(const IColumn & column, size_t row) +{ + if (const auto * nullable = checkAndGetColumn(column)) + return nullable->isNullAt(row); + return false; +} + template int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, size_t lhs_pos, size_t rhs_pos, int null_direction_hint) { @@ -54,7 +61,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, if (left_nullable && right_nullable) { int res = left_nullable->compareAt(lhs_pos, rhs_pos, right_column, null_direction_hint); - if (res) + if (res != 0) return res; /// NULL != NULL case @@ -108,9 +115,9 @@ int ALWAYS_INLINE compareCursors(const SortCursorImpl & lhs, const SortCursorImp return compareCursors(lhs, lhs.getRow(), rhs, rhs.getRow(), lhs.sort_columns_size, null_direction_hint); } -int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs) +int compareAsofCursors(const FullMergeJoinCursor & lhs, const FullMergeJoinCursor & rhs, int null_direction_hint) { - return nullableCompareAt(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow()); + return nullableCompareAt(*lhs.getAsofColumn(), *rhs.getAsofColumn(), lhs->getRow(), rhs->getRow(), null_direction_hint); } bool ALWAYS_INLINE totallyLess(SortCursorImpl & lhs, SortCursorImpl & rhs, int null_direction_hint) @@ -235,12 +242,6 @@ void inline addRange(PaddedPODArray & values, UInt64 start, UInt64 end) values.push_back(i); } -void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size_t num) -{ - for (size_t i = 0; i < num; ++i) - left_or_right_map.push_back(idx); -} - void inline addMany(PaddedPODArray & values, UInt64 value, size_t num) { values.resize_fill(values.size() + num, value); @@ -257,7 +258,7 @@ JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos) new_col->insertFrom(*col, pos); row.push_back(std::move(new_col)); } - if (auto asof_column = cursor.getAsofColumn()) + if (const auto * asof_column = cursor.getAsofColumn()) { auto new_col = asof_column->cloneEmpty(); new_col->insertFrom(*asof_column, pos); @@ -275,10 +276,10 @@ bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const if (row.empty()) return false; - assert(this->row.size() == cursor->sort_columns_size); for (size_t i = 0; i < cursor->sort_columns_size; ++i) { - int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction); + // int cmp = this->row[i]->compareAt(0, cursor->getRow(), *(cursor->sort_columns[i]), cursor->desc[i].nulls_direction); + int cmp = nullableCompareAt(*this->row[i], *cursor->sort_columns[i], 0, cursor->getRow(), cursor->desc[i].nulls_direction); if (cmp != 0) return false; } @@ -287,9 +288,16 @@ bool JoinKeyRow::equals(const FullMergeJoinCursor & cursor) const bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const { + chassert(this->row.size() == cursor->sort_columns_size + 1); if (!equals(cursor)) return false; - int cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *row.back(), 1); + + const auto & asof_row = row.back(); + if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow())) + return false; + + int cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1); + return (asof_inequality == ASOFJoinInequality::Less && cmp < 0) || (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0) || (asof_inequality == ASOFJoinInequality::Greater && cmp > 0) @@ -333,7 +341,7 @@ FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const Sort : sample_block(sample_block_.cloneEmpty()) , desc(description_) { - if (desc.size() == 0) + if (desc.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty sort description for FullMergeJoinCursor"); if (is_asof) @@ -393,7 +401,7 @@ String FullMergeJoinCursor::dump() const row_dump.push_back(val.dump()); } - if (auto * asof_column = getAsofColumn()) + if (const auto * asof_column = getAsofColumn()) { asof_column->get(cursor.getRow(), val); row_dump.push_back(val.dump()); @@ -436,9 +444,10 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeJoinAlgorithm does not support ON filter conditions"); cursors = { - createCursor(input_headers[0], on_clause_.key_names_left, strictness), - createCursor(input_headers[1], on_clause_.key_names_right, strictness), + createCursor(input_headers[0], on_clause_.key_names_left, strictness), + createCursor(input_headers[1], on_clause_.key_names_right, strictness), }; +} MergeJoinAlgorithm::MergeJoinAlgorithm( JoinPtr join_ptr, @@ -458,7 +467,7 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( left_to_right_key_remap[left_idx] = right_idx; } - const auto *smjPtr = typeid_cast(table_join.get()); + const auto *smjPtr = typeid_cast(join_ptr.get()); if (smjPtr) { null_direction_hint = smjPtr->getNullDirection(); @@ -993,11 +1002,18 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() { auto lpos = left_cursor->getRow(); auto rpos = right_cursor->getRow(); - auto cmp = compareCursors(*left_cursor, *right_cursor); + auto cmp = compareCursors(*left_cursor, *right_cursor, null_direction_hint); + if (cmp == 0) + { + if (isNullAt(*left_cursor.getAsofColumn(), lpos)) + cmp = -1; + if (isNullAt(*right_cursor.getAsofColumn(), rpos)) + cmp = 1; + } if (cmp == 0) { - auto asof_cmp = compareAsofCursors(left_cursor, right_cursor); + auto asof_cmp = compareAsofCursors(left_cursor, right_cursor, null_direction_hint); if ((asof_inequality == ASOFJoinInequality::Less && asof_cmp <= -1) || (asof_inequality == ASOFJoinInequality::LessOrEquals && asof_cmp <= 0)) @@ -1048,7 +1064,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() asof_join_state.reset(); if (isLeft(kind)) { - /// return row with default values at right side size_t i = 0; for (const auto & col : left_columns) @@ -1068,7 +1083,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() { if (asof_join_state.hasMatch(left_cursor, asof_inequality)) { - size_t i = 0; for (const auto & col : left_columns) result_cols[i++]->insertFrom(*col, lpos); @@ -1099,7 +1113,6 @@ MergeJoinAlgorithm::Status MergeJoinAlgorithm::asofJoin() } else { - /// skip rows in right table until we find match for current row in left table nextDistinct(*right_cursor); } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index c8ba857781e..15bf13381b8 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -167,7 +167,7 @@ public: void set(const FullMergeJoinCursor & rcursor, size_t rpos); void reset(); - bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) + bool hasMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequality asof_inequality) const { if (value.empty()) return false; diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index e3423aa0386..a3fda006eb8 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -30,31 +32,6 @@ using namespace DB; namespace { -[[ maybe_unused ]] -String dumpBlockSource(std::shared_ptr source, bool mono_block = false) -{ - WriteBufferFromOwnString buf; - { - Block header = source->getPort().getHeader(); - QueryPipeline pipeline(source); - auto format = std::make_shared(buf, header, FormatSettings{}, mono_block); - pipeline.complete(std::move(format)); - - CompletedPipelineExecutor executor(pipeline); - executor.execute(); - } - return buf.str(); -} - -[[ maybe_unused ]] -String dumpBlock(const Block & block) -{ - Block header = block.cloneEmpty(); - Chunk data(block.getColumns(), block.rows()); - auto source = std::make_shared(header, std::move(data)); - return dumpBlockSource(std::move(source)); -} - QueryPipeline buildJoinPipeline( std::shared_ptr left_source, std::shared_ptr right_source, diff --git a/tests/queries/0_stateless/00927_asof_join_other_types.sql.j2 b/tests/queries/0_stateless/00927_asof_join_other_types.sql.j2 old mode 100755 new mode 100644 diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference index 1055a67ea5b..0aa1a85f19d 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.reference @@ -1,3 +1,6 @@ v1 o1 ['s2','s1'] v1 o2 ['s4'] v2 o3 ['s5','s3'] +v1 o1 ['s2','s1'] +v1 o2 ['s4'] +v2 o3 ['s5','s3'] diff --git a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql index 8a94b6ddd24..652cb35cf2a 100644 --- a/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql +++ b/tests/queries/0_stateless/01116_asof_join_dolbyzerr.sql @@ -16,3 +16,17 @@ GROUP BY ORDER BY visitorId ASC, orderId ASC; + +SELECT + visitorId, + orderId, + groupUniqArray(sessionId) +FROM sessions +ASOF INNER JOIN orders ON (sessions.visitorId = orders.visitorId) AND (sessions.date <= orders.date) +GROUP BY + visitorId, + orderId +ORDER BY + visitorId ASC, + orderId ASC +SETTINGS join_algorithm = 'full_sorting_merge'; diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.reference b/tests/queries/0_stateless/02240_asof_join_biginteger.reference index cac55eec430..f7eb4d74375 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.reference +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.reference @@ -2,3 +2,7 @@ 0 340282366920938463463374607431768211457 0 18446744073709551617 0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 +0 18446744073709551617 +0 340282366920938463463374607431768211457 diff --git a/tests/queries/0_stateless/02240_asof_join_biginteger.sql b/tests/queries/0_stateless/02240_asof_join_biginteger.sql index 6dc5b00f116..a5c1faae4ea 100644 --- a/tests/queries/0_stateless/02240_asof_join_biginteger.sql +++ b/tests/queries/0_stateless/02240_asof_join_biginteger.sql @@ -3,3 +3,11 @@ select * from (select 0 as k, toInt256('340282366920938463463374607431768211457' select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +SET join_algorithm = 'full_sorting_merge'; + +select * from (select 0 as k, toInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); + +select * from (select 0 as k, toUInt128('18446744073709551617') as v) t1 asof join (select 0 as k, toUInt128('18446744073709551616') as v) t2 using(k, v); +select * from (select 0 as k, toUInt256('340282366920938463463374607431768211457') as v) t1 asof join (select 0 as k, toUInt256('340282366920938463463374607431768211456') as v) t2 using(k, v); diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql index 13dfb5debe7..6aa70a379c1 100644 --- a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql @@ -6,3 +6,15 @@ ASOF LEFT JOIN ( select 1 as session_id, 4 as id ) as visitors ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +; + +select count(*) +from ( + select 1 as id, [1, 2, 3] as arr +) as sessions +ASOF LEFT JOIN ( + select 1 as session_id, 4 as id +) as visitors +ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference new file mode 100644 index 00000000000..2850a8aba98 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -0,0 +1,2 @@ +49999983751397 10000032 +49999983751397 10000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql new file mode 100644 index 00000000000..c421702bb00 --- /dev/null +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -0,0 +1,48 @@ +-- Tags: long + +DROP TABLE IF EXISTS build; +DROP TABLE IF EXISTS skewed_probe; + +CREATE TABLE build ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, + number % 4 AS key, + number AS value + FROM numbers(0, 10000000); + +CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) +AS + SELECT + toDateTime('1990-04-21 13:00:01') + INTERVAL number MINUTE AS begin, + 0 AS key + FROM numbers(0, 5) + UNION ALL + SELECT + toDateTime('1990-05-21 13:00:01') + INTERVAL number MINUTE AS begin, + 1 AS key + FROM numbers(0, 10) + UNION ALL + SELECT + toDateTime('1990-06-21 13:00:01') + INTERVAL number MINUTE AS begin, + 2 AS key + FROM numbers(0, 20) + UNION ALL + SELECT + toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, + 3 AS key + FROM numbers(0, 10000000); + + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +; + +SELECT SUM(value), COUNT(*) +FROM skewed_probe +ASOF JOIN build +USING (key, begin) +SETTINGS join_algorithm = 'full_sorting_merge' +; diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference new file mode 100644 index 00000000000..f130f0a3f3b --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.reference @@ -0,0 +1,58 @@ +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +0 0 +1 0 +2 0 +3 1 +4 1 +5 1 +6 2 +7 2 +8 3 +9 3 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 +1 0 0 +1 1 0 +1 2 0 +1 3 1 +1 4 1 +1 5 1 +1 6 2 +1 7 2 +1 8 3 +1 9 3 +2 0 10 +2 1 10 +2 2 10 +2 3 10 +2 4 10 +2 5 10 +2 6 10 +2 7 20 +2 8 20 +2 9 20 diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql new file mode 100644 index 00000000000..ef16ced3082 --- /dev/null +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql @@ -0,0 +1,64 @@ +SET join_algorithm = 'full_sorting_merge'; + +DROP TABLE IF EXISTS events0; + +CREATE TABLE events0 ( + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY begin; + +INSERT INTO events0 VALUES (1.0, 0), (3.0, 1), (6.0, 2), (8.0, 3); + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC; + +SELECT p.ts, e.value +FROM + (SELECT number :: Float64 AS ts FROM numbers(10)) p +ASOF LEFT JOIN events0 e +ON p.ts >= e.begin +ORDER BY p.ts ASC +-- SETTINGS join_use_nulls = 1 +; + +DROP TABLE IF EXISTS events0; + +DROP TABLE IF EXISTS events; +DROP TABLE IF EXISTS probes; + +CREATE TABLE events ( + key Int32, + begin Float64, + value Int32 +) ENGINE = MergeTree ORDER BY (key, begin); + +INSERT INTO events VALUES (1, 1.0, 0), (1, 3.0, 1), (1, 6.0, 2), (1, 8.0, 3), (2, 0.0, 10), (2, 7.0, 20), (2, 11.0, 30); + +CREATE TABLE probes ( + key Int32, + ts Float64 +) ENGINE = MergeTree ORDER BY (key, ts) AS +SELECT + key.number, + ts.number +FROM + numbers(1, 2) as key, + numbers(10) as ts +SETTINGS join_algorithm = 'hash'; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC; + +SELECT p.key, p.ts, e.value +FROM probes p +ASOF LEFT JOIN events e +ON p.key = e.key AND p.ts >= e.begin +ORDER BY p.key, p.ts ASC NULLS FIRST; + diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference new file mode 100644 index 00000000000..73c4f7dfe25 --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference @@ -0,0 +1,68 @@ +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +2023-03-21 12:00:00 1970-01-01 00:00:00 -1 +2023-03-21 13:00:00 1970-01-01 00:00:00 -1 +2023-03-21 14:00:00 2023-03-21 13:00:00 0 +2023-03-21 15:00:00 2023-03-21 14:00:00 1 +2023-03-21 16:00:00 2023-03-21 15:00:00 2 +2023-03-21 17:00:00 2023-03-21 16:00:00 3 +2023-03-21 18:00:00 2023-03-21 16:00:00 3 +2023-03-21 19:00:00 2023-03-21 16:00:00 3 +2023-03-21 20:00:00 2023-03-21 16:00:00 3 +2023-03-21 21:00:00 2023-03-21 16:00:00 3 +2027-10-18 11:03:27 2023-03-21 16:00:00 3 +\N \N \N +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 13:00:00 0 +2023-03-21 14:00:00 2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2023-03-21 15:00:00 2 +2023-03-21 16:00:00 2023-03-21 16:00:00 3 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 2027-10-18 11:03:27 9 +\N \N \N +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2023-03-21 12:00:00 2023-03-21 13:00:00 0 +2023-03-21 13:00:00 2023-03-21 14:00:00 1 +2023-03-21 14:00:00 2023-03-21 15:00:00 2 +2023-03-21 15:00:00 2023-03-21 16:00:00 3 +2023-03-21 16:00:00 2027-10-18 11:03:27 9 +2023-03-21 17:00:00 2027-10-18 11:03:27 9 +2023-03-21 18:00:00 2027-10-18 11:03:27 9 +2023-03-21 19:00:00 2027-10-18 11:03:27 9 +2023-03-21 20:00:00 2027-10-18 11:03:27 9 +2023-03-21 21:00:00 2027-10-18 11:03:27 9 +2027-10-18 11:03:27 \N \N +\N \N \N diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql new file mode 100644 index 00000000000..69de17541c1 --- /dev/null +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET join_algorithm = 'full_sorting_merge'; + +CREATE TABLE events0 ( + begin Nullable(DateTime('UTC')), + value Int32 +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO events0 SELECT toDateTime('2023-03-21 13:00:00', 'UTC') + INTERVAL number HOUR, number FROM numbers(4); +INSERT INTO events0 VALUES (NULL, -10),('0000-01-01 00:00:00', -1), ('9999-12-31 23:59:59', 9); + +CREATE TABLE probe0 ( + begin Nullable(DateTime('UTC')) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO probe0 SELECT toDateTime('2023-03-21 12:00:00', 'UTC') + INTERVAl number HOUR FROM numbers(10); +INSERT INTO probe0 VALUES (NULL),('9999-12-31 23:59:59'); + +SET join_use_nulls = 1; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin > e.begin +ORDER BY p.begin ASC +; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin <= e.begin +ORDER BY p.begin ASC; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC +; + +SELECT p.begin, e.begin, e.value +FROM probe0 p +ASOF LEFT JOIN events0 e +ON p.begin < e.begin +ORDER BY p.begin ASC; + + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference new file mode 100644 index 00000000000..ca481c7fff0 --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.reference @@ -0,0 +1,2 @@ +26790 1488 +26790 1488 diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 new file mode 100644 index 00000000000..551bac0cc06 --- /dev/null +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 @@ -0,0 +1,37 @@ +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; + +{% for join_algorithm in ['default', 'full_sorting_merge'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +-- TODO: support enable for full_sorting_merge +-- SET join_use_nulls = 1; + +WITH build AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:00') + INTERVAL i.number SECONDS AS t, + i.number % 37 AS v + FROM numbers(3000000) AS i + CROSS JOIN numbers(2) AS tk + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +), +probe AS ( + SELECT + tk.number AS k, + toDateTime('2021-01-01 00:00:30') + INTERVAL tt.number HOUR AS t + FROM numbers(2) AS tk + CROSS JOIN numbers(toUInt32((toDateTime('2021-02-01 00:00:30') - toDateTime('2021-01-01 00:00:30')) / 3600)) AS tt + SETTINGS join_algorithm = 'hash', join_use_nulls = 0 +) +SELECT + SUM(v) AS v, + COUNT(*) AS n +FROM probe +ASOF LEFT JOIN build +USING (k, t) +; + +{% endfor -%} From 29494d0bc6cc1b0f82a887a06f0643ea4880e681 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 1 May 2024 15:08:50 +0000 Subject: [PATCH 15/57] add asof_join_ddb tests --- .../03144_asof_join_ddb_doubles.sql | 1 + .../03145_asof_join_ddb_inequalities.sql | 1 + .../03147_asof_join_ddb_missing.reference | 10 + .../03147_asof_join_ddb_missing.sql | 186 ++++++++++++++++++ .../03148_asof_join_ddb_subquery.reference | 4 + .../03148_asof_join_ddb_subquery.sql | 29 +++ .../03149_asof_join_ddb_timestamps.reference | 56 ++++++ .../03149_asof_join_ddb_timestamps.sql | 95 +++++++++ 8 files changed, 382 insertions(+) create mode 100644 tests/queries/0_stateless/03147_asof_join_ddb_missing.reference create mode 100644 tests/queries/0_stateless/03147_asof_join_ddb_missing.sql create mode 100644 tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference create mode 100644 tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql create mode 100644 tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference create mode 100644 tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql diff --git a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql index ef16ced3082..87aece14628 100644 --- a/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql +++ b/tests/queries/0_stateless/03144_asof_join_ddb_doubles.sql @@ -1,4 +1,5 @@ SET join_algorithm = 'full_sorting_merge'; +SET allow_experimental_analyzer = 1; DROP TABLE IF EXISTS events0; diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql index 69de17541c1..ce4badbd597 100644 --- a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS events0; DROP TABLE IF EXISTS probe0; +SET allow_experimental_analyzer = 1; SET join_algorithm = 'full_sorting_merge'; CREATE TABLE events0 ( diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference new file mode 100644 index 00000000000..11eb84463f4 --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.reference @@ -0,0 +1,10 @@ +108 +108 27 +513 +1218 +3528 +14553 +121275 +1495503 +12462525 +1249625025 diff --git a/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql new file mode 100644 index 00000000000..95a5f8ab3ff --- /dev/null +++ b/tests/queries/0_stateless/03147_asof_join_ddb_missing.sql @@ -0,0 +1,186 @@ +SET allow_experimental_analyzer=1; + +SET session_timezone = 'UTC'; +SET joined_subquery_requires_alias = 0; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; + +-- # 10 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # Coverage: Missing right side bin +WITH build AS ( + SELECT + k * 2 AS k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + intDiv(k, 2) AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v), COUNT(*) +FROM probe ASOF JOIN build USING (k, t); + +-- # 20 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(20), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 30 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(30), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 50 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(50), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 100 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(100), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 5 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(5)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 1000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(1000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); + +-- # 10000 dates, 50 keys +WITH build AS ( + SELECT + k, + toDateTime('2001-01-01 00:00:00') + INTERVAL number MINUTE AS t, + number AS v + FROM numbers(10000), (SELECT number AS k FROM numbers(50)) + SETTINGS join_algorithm = 'default' +), +probe AS ( + SELECT + k * 2 AS k, + t - INTERVAL 30 SECOND AS t + FROM build +) +SELECT SUM(v) +FROM probe ASOF JOIN build USING (k, t); diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference new file mode 100644 index 00000000000..387a4a8f249 --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.reference @@ -0,0 +1,4 @@ +1 1 +3 1 +6 1 +8 1 diff --git a/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql new file mode 100644 index 00000000000..2ddf0f09b1e --- /dev/null +++ b/tests/queries/0_stateless/03148_asof_join_ddb_subquery.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS events; +CREATE TABLE events (begin Float64, value Int32) ENGINE = MergeTree() ORDER BY begin; + +INSERT INTO events VALUES (1, 0), (3, 1), (6, 2), (8, 3); + +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET joined_subquery_requires_alias = 0; + +SELECT + begin, + value IN ( + SELECT e1.value + FROM ( + SELECT * + FROM events e1 + WHERE e1.value = events.value + ) AS e1 + ASOF JOIN ( + SELECT number :: Float64 AS begin + FROM numbers(10) + WHERE number >= 1 AND number < 10 + ) + USING (begin) + ) +FROM events +ORDER BY begin ASC; + +DROP TABLE IF EXISTS events; diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference new file mode 100644 index 00000000000..7cfc85d23a5 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.reference @@ -0,0 +1,56 @@ +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N \N +2023-03-21 12:00:00 0 +2023-03-21 13:00:00 0 +2023-03-21 14:00:00 1 +2023-03-21 15:00:00 2 +2023-03-21 16:00:00 3 +2023-03-21 17:00:00 3 +2023-03-21 18:00:00 3 +2023-03-21 19:00:00 3 +2023-03-21 20:00:00 3 +2023-03-21 21:00:00 3 +2106-02-07 06:28:15 9 +\N 0 +2023-03-21 12:00:00 \N +2023-03-21 13:00:00 \N +2023-03-21 14:00:00 \N +2023-03-21 15:00:00 \N +2023-03-21 16:00:00 \N +2023-03-21 17:00:00 \N +2023-03-21 18:00:00 \N +2023-03-21 19:00:00 \N +2023-03-21 20:00:00 \N +2023-03-21 21:00:00 \N +2106-02-07 06:28:15 \N +\N \N diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql new file mode 100644 index 00000000000..ff4518a3775 --- /dev/null +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql @@ -0,0 +1,95 @@ +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; + +SET session_timezone = 'UTC'; +SET allow_experimental_analyzer = 1; +SET join_algorithm = 'full_sorting_merge'; +SET join_use_nulls = 1; + +CREATE TABLE events0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 13:00:00') + INTERVAL number HOUR) AS begin, + number AS value +FROM numbers(4); + +INSERT INTO events0 VALUES (NULL, -1), (toDateTime('9999-12-31 23:59:59'), 9); + +CREATE TABLE probe0 +ENGINE = MergeTree() +ORDER BY COALESCE(begin, toDateTime('9999-12-31 23:59:59')) +AS +SELECT + toNullable(toDateTime('2023-03-21 12:00:00') + INTERVAL number HOUR) AS begin +FROM numbers(10); + +INSERT INTO probe0 VALUES (NULL), (toDateTime('9999-12-31 23:59:59')); + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e ON p.begin >= e.begin +ORDER BY p.begin ASC; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN events0 e USING (begin) +ORDER BY p.begin ASC +SETTINGS join_use_nulls = 0 +; + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e ON p.begin >= e.begin +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF RIGHT JOIN events0 e USING (begin) +ORDER BY e.begin ASC; -- { serverError NOT_IMPLEMENTED} + + +SELECT + p.begin, + e.value +FROM + probe0 p + ASOF LEFT JOIN ( + SELECT * FROM events0 WHERE log(value + 5) > 10 + ) e ON p.begin >= e.begin +ORDER BY p.begin ASC; + + +DROP TABLE IF EXISTS events0; +DROP TABLE IF EXISTS probe0; From d37f03201aaac06b1a175ac0b177c17d945dddbb Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 7 May 2024 10:07:43 +0000 Subject: [PATCH 16/57] fix --- src/Processors/Transforms/MergeJoinTransform.cpp | 2 -- tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 26e1ebb0b60..9e6904f0613 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -1173,9 +1173,7 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (auto result = handleAllJoinState()) - { return std::move(*result); - } if (auto result = handleAsofJoinState()) return std::move(*result); diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 index 551bac0cc06..44c54ae2a39 100644 --- a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 @@ -1,3 +1,5 @@ +-- Tags: long + SET allow_experimental_analyzer=1; SET session_timezone = 'UTC'; From 558b73aba4f89db4a2b3e5ed0754749caddd5dfb Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 7 May 2024 14:20:40 +0000 Subject: [PATCH 17/57] t --- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index c421702bb00..a41b667e6c8 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -1,5 +1,7 @@ -- Tags: long +-- https://s3.amazonaws.com/clickhouse-test-reports/55051/07f288862c56b0a98379a07101062689b0460788/stateless_tests_flaky_check__asan_.html + DROP TABLE IF EXISTS build; DROP TABLE IF EXISTS skewed_probe; From f710a67fb2e72361801f0ae9a45ccbd07e2c7f30 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 22 May 2024 12:27:04 +0000 Subject: [PATCH 18/57] set timezone in 03143_asof_join_ddb_long --- src/Processors/Transforms/MergeJoinTransform.cpp | 4 ++-- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 9e6904f0613..38b63a856f6 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -45,8 +45,8 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, bool isNullAt(const IColumn & column, size_t row) { - if (const auto * nullable = checkAndGetColumn(column)) - return nullable->isNullAt(row); + if (const auto * nullable_column = checkAndGetColumn(&column)) + return nullable_column->isNullAt(row); return false; } diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index a41b667e6c8..17a67511030 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -1,10 +1,10 @@ -- Tags: long --- https://s3.amazonaws.com/clickhouse-test-reports/55051/07f288862c56b0a98379a07101062689b0460788/stateless_tests_flaky_check__asan_.html - DROP TABLE IF EXISTS build; DROP TABLE IF EXISTS skewed_probe; +SET session_timezone = 'UTC'; + CREATE TABLE build ENGINE = MergeTree ORDER BY (key, begin) AS SELECT From 11f4ed75fd18a853e70c94f10596d5b3b09a35dd Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 5 Jun 2024 12:26:05 +0200 Subject: [PATCH 19/57] fix build --- src/Processors/tests/gtest_full_sorting_join.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index a3fda006eb8..7294a1b381a 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -151,7 +151,6 @@ public: size_t rows = current_chunk.front()->size(); chunks.emplace_back(std::move(current_chunk), rows); current_chunk = header.cloneEmptyColumns(); - return; } std::shared_ptr getSource() @@ -403,7 +402,7 @@ try auto expected_right = ColumnString::create(); UInt64 k1 = 1; - String k2 = ""; + String k2; auto get_attr = [&](const String & side, size_t idx) -> String { @@ -601,7 +600,7 @@ try ColumnInt64::Container expected; UInt64 k1 = 1; - String k2 = ""; + String k2; auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { @@ -690,7 +689,7 @@ try ColumnInt64::Container expected; UInt64 k1 = 1; - String k2 = ""; + String k2; UInt64 left_t = 0; auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); From 91598d10e9ab6f9d7054ed7e4204665e85b7636c Mon Sep 17 00:00:00 2001 From: Tobias Florek Date: Thu, 27 Jun 2024 09:29:06 +0200 Subject: [PATCH 20/57] document declarative ssh-keys authentication --- docs/en/operations/settings/settings-users.md | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 96477f777a9..ef1e58fd18e 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -22,6 +22,21 @@ Structure of the `users` section: + + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + + + ecdsa-sha2-nistp256 + AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBNxeV2uN5UY6CUbCzTA1rXfYimKQA5ivNIqxdax4bcMXz4D0nSk2l5E1TkR5mG8EBWtmExSPbcEPJ8V7lyWWbA8= + + + ssh-rsa + AAAAB3NzaC1yc2EAAAADAQABAAABgQCpgqL1SHhPVBOTFlOm0pu+cYBbADzC2jL41sPMawYCJHDyHuq7t+htaVVh2fRgpAPmSEnLEC2d4BEIKMtPK3bfR8plJqVXlLt6Q8t4b1oUlnjb3VPA9P6iGcW7CV1FBkZQEVx8ckOfJ3F+kI5VsrRlEDgiecm/C1VPl0/9M2llW/mPUMaD65cM9nlZgM/hUeBrfxOEqM11gDYxEZm1aRSbZoY4dfdm3vzvpSQ6lrCrkjn3X2aSmaCLcOWJhfBWMovNDB8uiPuw54g3ioZ++qEQMlfxVsqXDGYhXCrsArOVuW/5RbReO79BvXqdssiYShfwo+GhQ0+aLWMIW/jgBkkqx/n7uKLzCMX7b2F+aebRYFh+/QXEj7SnihdVfr9ud6NN3MWzZ1ltfIczlEcFLrLJ1Yq57wW6wXtviWh59WvTWFiPejGjeSjjJyqqB49tKdFVFuBnIU5u/bch2DXVgiAEdQwUrIp1ACoYPq22HFFAYUJrL32y7RxX3PGzuAv3LOc= + + + 0|1 @@ -79,6 +94,24 @@ Password can be specified in plaintext or in SHA256 (hex format). The first line of the result is the password. The second line is the corresponding double SHA1 hash. +### username/ssh-key {#user-sshkey} + +This setting allows authenticating with SSH keys. + +Given a SSH key (as generated by `ssh-keygen`) like +``` +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj john@example.com +``` +The `ssh_key` element is expected to be +``` + + ssh-ed25519 + AAAAC3NzaC1lZDI1NTE5AAAAIDNf0r6vRl24Ix3tv2IgPmNPO2ATa2krvt80DdcTatLj + +``` + +Substitute `ssh-ed25519` with `ssh-rsa` or `ecdsa-sha2-nistp256` for the other supported algorithms. + ### access_management {#access_management-user-setting} This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user. From 85baa91ba4774c449ef72d7f42278397598205b9 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 1 Jul 2024 15:26:19 +0200 Subject: [PATCH 21/57] Added spell exception --- .../aspell-ignore/en/aspell-dict.txt | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2bd949f102d..d100b1bc2d9 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -48,7 +48,6 @@ AutoML Autocompletion AvroConfluent BIGINT -bigrams BIGSERIAL BORO BSON @@ -223,7 +222,6 @@ DatabaseOrdinaryThreadsActive DateTime DateTimes DbCL -deallocated Decrypted Deduplicate Deduplication @@ -295,7 +293,6 @@ FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes FixedString FlameGraph -flameGraph Flink ForEach FreeBSD @@ -1009,7 +1006,6 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unigrams Updatable Uppercased Uptime @@ -1221,6 +1217,7 @@ basename bcrypt benchmarking bfloat +bigrams binlog bitAnd bitCount @@ -1470,6 +1467,7 @@ dbeaver dbgen dbms ddl +deallocated deallocation deallocations debian @@ -1509,11 +1507,11 @@ deserializing destructor destructors detectCharset -detectTonality detectLanguage detectLanguageMixed detectLanguageUnknown detectProgrammingLanguage +detectTonality determinator deterministically dictGet @@ -1529,8 +1527,8 @@ dictIsIn disableProtocols disjunction disjunctions -displaySecretsInShowAndSelect displayName +displaySecretsInShowAndSelect distro divideDecimal dmesg @@ -1580,11 +1578,11 @@ evalMLMethod exFAT expiryMsec exponentialMovingAverage -exponentialmovingaverage exponentialTimeDecayedAvg exponentialTimeDecayedCount exponentialTimeDecayedMax exponentialTimeDecayedSum +exponentialmovingaverage expr exprN extendedVerification @@ -1621,6 +1619,7 @@ firstSignificantSubdomainCustom firstSignificantSubdomainCustomRFC firstSignificantSubdomainRFC fixedstring +flameGraph flamegraph flatbuffers flattenTuple @@ -1803,8 +1802,8 @@ incrementing indexHint indexOf infi -infty inflight +infty initcap initcapUTF initialQueryID @@ -1952,9 +1951,9 @@ loghouse london lookups loongarch -lowcardinality lowCardinalityIndices lowCardinalityKeys +lowcardinality lowerUTF lowercased lttb @@ -2262,9 +2261,9 @@ proleptic prometheus proportionsZTest proto -protocol protobuf protobufsingle +protocol proxied pseudorandom pseudorandomize @@ -2516,6 +2515,7 @@ sqlite sqrt src srcReplicas +sshkey stacktrace stacktraces startsWith @@ -2808,6 +2808,7 @@ unescaping unhex unicode unidimensional +unigrams unintuitive uniq uniqCombined From a84dd6b7710da54e89722318755d4e4e70984e2b Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 11 Jul 2024 10:37:00 +0000 Subject: [PATCH 22/57] Fix some review comments --- .../Transforms/MergeJoinTransform.cpp | 2 +- .../tests/gtest_full_sorting_join.cpp | 48 +++++++++++++------ .../00927_asof_join_correct_bt.reference | 8 ++++ .../00927_asof_join_correct_bt.sql | 3 ++ .../02276_full_sort_join_unsupported.sql | 2 - ...03145_asof_join_ddb_inequalities.reference | 5 ++ .../03145_asof_join_ddb_inequalities.sql | 12 +++-- .../03146_asof_join_ddb_merge_long.sql.j2 | 2 +- .../03149_asof_join_ddb_timestamps.sql | 2 +- 9 files changed, 59 insertions(+), 25 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 38a59cd6d9a..3b69ddaec06 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -43,7 +43,7 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns, return std::make_unique(block, desc, strictness == JoinStrictness::Asof); } -bool isNullAt(const IColumn & column, size_t row) +bool ALWAYS_INLINE isNullAt(const IColumn & column, size_t row) { if (const auto * nullable_column = checkAndGetColumn(&column)) return nullable_column->isNullAt(row); diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp index 7294a1b381a..f678d7984e8 100644 --- a/src/Processors/tests/gtest_full_sorting_join.cpp +++ b/src/Processors/tests/gtest_full_sorting_join.cpp @@ -484,7 +484,7 @@ try right_source.addRow({"AAPL", 2, 98}); right_source.addRow({"AAPL", 3, 99}); right_source.addRow({"AMZN", 1, 100}); - right_source.addRow({"AMZN", 2, 0}); + right_source.addRow({"AMZN", 2, 110}); right_source.addChunk(); right_source.addRow({"AMZN", 2, 110}); right_source.addChunk(); @@ -574,12 +574,15 @@ catch (Exception & e) TEST_F(FullSortingJoinTest, AsofLessGeneratedTestData) try { - auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + /// Generate data random and build expected result at the same time. + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Less, ASOFJoinInequality::LessOrEquals }); SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); + /// Key is complex, `k1, k2` for equality and `t` for asof SourceChunksBuilder left_source_builder({ {std::make_shared(), "k1"}, {std::make_shared(), "k2"}, @@ -594,9 +597,11 @@ try {std::make_shared(), "attr"}, }); + /// How small generated block should be left_source_builder.setBreakProbability(rng); right_source_builder.setBreakProbability(rng); + /// We are going to generate sorted data and remember expected result ColumnInt64::Container expected; UInt64 k1 = 1; @@ -604,29 +609,34 @@ try auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { + /// Generate new key greater than previous generateNextKey(rng, k1, k2); Int64 left_t = 0; + /// Generate several rows for the key size_t num_left_rows = std::uniform_int_distribution<>(1, 100)(rng); for (size_t i = 0; i < num_left_rows; ++i) { + /// t is strictly greater than previous left_t += std::uniform_int_distribution<>(1, 10)(rng); - left_source_builder.addRow({k1, k2, left_t, 10 * left_t}); - expected.push_back(10 * left_t); + auto left_arrtibute_value = 10 * left_t; + left_source_builder.addRow({k1, k2, left_t, left_arrtibute_value}); + expected.push_back(left_arrtibute_value); auto num_matches = 1 + std::poisson_distribution<>(4)(rng); - + /// Generate several matches in the right table auto right_t = left_t; for (size_t j = 0; j < num_matches; ++j) { int min_step = isStrict(asof_inequality) ? 1 : 0; right_t += std::uniform_int_distribution<>(min_step, 3)(rng); + /// First row should match bool is_match = j == 0; - right_source_builder.addRow({k1, k2, right_t, is_match ? 100 * left_t : -1}); + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * left_arrtibute_value : -1}); } - /// next left_t should be greater than right_t not to match with previous rows + /// Next left_t should be greater than right_t not to match with previous rows left_t = right_t; } @@ -650,7 +660,9 @@ try assertColumnVectorEq(expected, result_block, "t1.attr"); for (auto & e : expected) - e = e < 0 ? 0 : 10 * e; /// non matched rows from left table have negative attr + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; assertColumnVectorEq(expected, result_block, "t2.attr"); } @@ -663,8 +675,10 @@ catch (Exception & e) TEST_F(FullSortingJoinTest, AsofGreaterGeneratedTestData) try { - auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); + /// Generate data random and build expected result at the same time. + /// Test specific combinations of join kind and inequality per each run + auto join_kind = getRandomFrom(rng, { JoinKind::Inner, JoinKind::Left }); auto asof_inequality = getRandomFrom(rng, { ASOFJoinInequality::Greater, ASOFJoinInequality::GreaterOrEquals }); SCOPED_TRACE(fmt::format("{} {}", join_kind, asof_inequality)); @@ -695,9 +709,10 @@ try auto key_num_total = std::uniform_int_distribution<>(1, 1000)(rng); for (size_t key_num = 0; key_num < key_num_total; ++key_num) { + /// Generate new key greater than previous generateNextKey(rng, k1, k2); - /// generate some rows with smaller left_t to check that they are not matched + /// Generate some rows with smaller left_t to check that they are not matched size_t num_left_rows = std::bernoulli_distribution(0.5)(rng) ? std::uniform_int_distribution<>(1, 100)(rng) : 0; for (size_t i = 0; i < num_left_rows; ++i) { @@ -713,21 +728,22 @@ try size_t num_right_matches = std::uniform_int_distribution<>(1, 10)(rng); auto right_t = left_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 0 : 1, 10)(rng); + auto attribute_value = 10 * right_t; for (size_t j = 0; j < num_right_matches; ++j) { right_t += std::uniform_int_distribution<>(0, 3)(rng); bool is_match = j == num_right_matches - 1; - right_source_builder.addRow({k1, k2, right_t, is_match ? 100 * right_t : -1}); + right_source_builder.addRow({k1, k2, right_t, is_match ? 10 * attribute_value : -1}); } - /// next left_t should be greater than (or equals) right_t to match with previous rows + /// Next left_t should be greater than (or equals) right_t to match with previous rows left_t = right_t + std::uniform_int_distribution<>(isStrict(asof_inequality) ? 1 : 0, 100)(rng); size_t num_left_matches = std::uniform_int_distribution<>(1, 100)(rng); for (size_t j = 0; j < num_left_matches; ++j) { left_t += std::uniform_int_distribution<>(0, 3)(rng); - left_source_builder.addRow({k1, k2, left_t, 10 * right_t}); - expected.push_back(10 * right_t); + left_source_builder.addRow({k1, k2, left_t, attribute_value}); + expected.push_back(attribute_value); } } @@ -739,7 +755,9 @@ try assertColumnVectorEq(expected, result_block, "t1.attr"); for (auto & e : expected) - e = e < 0 ? 0 : 10 * e; /// non matched rows from left table have negative attr + /// Non matched rows from left table have negative attr + /// Value if attribute in right table is 10 times greater than in left table + e = e < 0 ? 0 : 10 * e; assertColumnVectorEq(expected, result_block, "t2.attr"); } diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference index a398f9604fd..28c48d2e290 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.reference +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.reference @@ -1,28 +1,36 @@ +-- { echoOn } +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SET join_algorithm = 'full_sorting_merge'; +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 1 104 4 4 104 1 1 105 5 4 104 1 +SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); 1 101 1 0 0 0 1 102 2 2 102 1 1 103 3 2 102 1 diff --git a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql index 761d6bacde6..d796b62d3b3 100644 --- a/tests/queries/0_stateless/00927_asof_join_correct_bt.sql +++ b/tests/queries/0_stateless/00927_asof_join_correct_bt.sql @@ -13,6 +13,7 @@ INSERT INTO B2(k,t,b) VALUES (1,102,2), (1,104,4); CREATE TABLE B3(k UInt32, b UInt64, t UInt32) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO B3(k,t,b) VALUES (1,102,2), (1,104,4); +-- { echoOn } SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER BY (A.k, A.t); SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); @@ -22,6 +23,8 @@ SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B1 B USING(k,t) ORDER SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B2 B USING(k,t) ORDER BY (A.k, A.t); SELECT A.k, A.t, A.a, B.b, B.t, B.k FROM A ASOF LEFT JOIN B3 B USING(k,t) ORDER BY (A.k, A.t); +-- { echoOff } + DROP TABLE B1; DROP TABLE B2; DROP TABLE B3; diff --git a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql index 03936107563..0b10101d8f2 100644 --- a/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql +++ b/tests/queries/0_stateless/02276_full_sort_join_unsupported.sql @@ -19,8 +19,6 @@ SELECT * FROM t1 ANTI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENT SELECT * FROM t1 SEMI JOIN t2 ON t1.key = t2.key; -- { serverError NOT_IMPLEMENTED } --- SELECT * FROM t1 ASOF JOIN t2 ON t1.key = t2.key AND t1.val > t2.val; -- { serverError NOT_IMPLEMENTED } - SELECT * FROM t1 ANY JOIN t2 ON t1.key = t2.key SETTINGS any_join_distinct_right_table_keys = 1; -- { serverError NOT_IMPLEMENTED } SELECT * FROM t1 JOIN t2 USING (key) SETTINGS join_use_nulls = 1; -- { serverError NOT_IMPLEMENTED } diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference index 73c4f7dfe25..4aac918c98c 100644 --- a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.reference @@ -1,3 +1,4 @@ +- 2023-03-21 12:00:00 1970-01-01 00:00:00 -1 2023-03-21 13:00:00 1970-01-01 00:00:00 -1 2023-03-21 14:00:00 2023-03-21 13:00:00 0 @@ -9,6 +10,7 @@ 2023-03-21 20:00:00 2023-03-21 16:00:00 3 2023-03-21 21:00:00 2023-03-21 16:00:00 3 2027-10-18 11:03:27 2023-03-21 16:00:00 3 +- 2023-03-21 12:00:00 1970-01-01 00:00:00 -1 2023-03-21 13:00:00 1970-01-01 00:00:00 -1 2023-03-21 14:00:00 2023-03-21 13:00:00 0 @@ -32,6 +34,7 @@ 2023-03-21 20:00:00 2027-10-18 11:03:27 9 2023-03-21 21:00:00 2027-10-18 11:03:27 9 2027-10-18 11:03:27 2027-10-18 11:03:27 9 +- 2023-03-21 12:00:00 2023-03-21 13:00:00 0 2023-03-21 13:00:00 2023-03-21 13:00:00 0 2023-03-21 14:00:00 2023-03-21 14:00:00 1 @@ -44,6 +47,7 @@ 2023-03-21 21:00:00 2027-10-18 11:03:27 9 2027-10-18 11:03:27 2027-10-18 11:03:27 9 \N \N \N +- 2023-03-21 12:00:00 2023-03-21 13:00:00 0 2023-03-21 13:00:00 2023-03-21 14:00:00 1 2023-03-21 14:00:00 2023-03-21 15:00:00 2 @@ -54,6 +58,7 @@ 2023-03-21 19:00:00 2027-10-18 11:03:27 9 2023-03-21 20:00:00 2027-10-18 11:03:27 9 2023-03-21 21:00:00 2027-10-18 11:03:27 9 +- 2023-03-21 12:00:00 2023-03-21 13:00:00 0 2023-03-21 13:00:00 2023-03-21 14:00:00 1 2023-03-21 14:00:00 2023-03-21 15:00:00 2 diff --git a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql index ce4badbd597..d67aa254bd6 100644 --- a/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql +++ b/tests/queries/0_stateless/03145_asof_join_ddb_inequalities.sql @@ -21,18 +21,19 @@ INSERT INTO probe0 VALUES (NULL),('9999-12-31 23:59:59'); SET join_use_nulls = 1; +SELECT '-'; SELECT p.begin, e.begin, e.value FROM probe0 p ASOF JOIN events0 e ON p.begin > e.begin ORDER BY p.begin ASC; +SELECT '-'; SELECT p.begin, e.begin, e.value FROM probe0 p ASOF LEFT JOIN events0 e ON p.begin > e.begin -ORDER BY p.begin ASC -; +ORDER BY p.begin ASC; SELECT p.begin, e.begin, e.value FROM probe0 p @@ -40,25 +41,26 @@ ASOF JOIN events0 e ON p.begin <= e.begin ORDER BY p.begin ASC; +SELECT '-'; SELECT p.begin, e.begin, e.value FROM probe0 p ASOF LEFT JOIN events0 e ON p.begin <= e.begin ORDER BY p.begin ASC; +SELECT '-'; SELECT p.begin, e.begin, e.value FROM probe0 p ASOF JOIN events0 e ON p.begin < e.begin -ORDER BY p.begin ASC -; +ORDER BY p.begin ASC; +SELECT '-'; SELECT p.begin, e.begin, e.value FROM probe0 p ASOF LEFT JOIN events0 e ON p.begin < e.begin ORDER BY p.begin ASC; - DROP TABLE IF EXISTS events0; DROP TABLE IF EXISTS probe0; diff --git a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 index 44c54ae2a39..49ba70c471e 100644 --- a/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 +++ b/tests/queries/0_stateless/03146_asof_join_ddb_merge_long.sql.j2 @@ -8,7 +8,7 @@ SET session_timezone = 'UTC'; SET join_algorithm = '{{ join_algorithm }}'; --- TODO: support enable for full_sorting_merge +-- TODO: enable once USING and `join_use_nulls` is supported by `full_sorting_merge` -- SET join_use_nulls = 1; WITH build AS ( diff --git a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql index ff4518a3775..cd83d62dc70 100644 --- a/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql +++ b/tests/queries/0_stateless/03149_asof_join_ddb_timestamps.sql @@ -87,7 +87,7 @@ FROM probe0 p ASOF LEFT JOIN ( SELECT * FROM events0 WHERE log(value + 5) > 10 - ) e ON p.begin >= e.begin + ) e ON p.begin + INTERVAL 2 HOUR >= e.begin + INTERVAL 1 HOUR ORDER BY p.begin ASC; From ff7f5fe80873aad2f0f7b6f4e1e73c0178a69503 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 1 May 2024 03:09:13 +0200 Subject: [PATCH 23/57] Move view targets to separate AST class "ASTViewTargets" in order to allow extending it to support more kinds of view targets. --- src/Backups/BackupUtils.cpp | 2 +- src/Backups/RestoreCoordinationLocal.cpp | 6 +- src/Backups/RestoreCoordinationLocal.h | 9 +- src/Backups/RestoreCoordinationRemote.cpp | 9 +- src/Databases/DDLDependencyVisitor.cpp | 19 +- src/Databases/DDLRenamingVisitor.cpp | 17 +- src/Databases/DatabaseReplicated.cpp | 160 +++++----- src/Databases/DatabaseReplicated.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 126 ++++++-- .../InterpreterShowCreateQuery.cpp | 3 +- src/Parsers/ASTCreateQuery.cpp | 95 +++--- src/Parsers/ASTCreateQuery.h | 40 ++- src/Parsers/ASTViewTargets.cpp | 300 ++++++++++++++++++ src/Parsers/ASTViewTargets.h | 102 ++++++ src/Parsers/CreateQueryUUIDs.cpp | 168 ++++++++++ src/Parsers/CreateQueryUUIDs.h | 40 +++ src/Parsers/ParserCreateQuery.cpp | 92 ++++-- src/Parsers/ParserViewTargets.cpp | 88 +++++ src/Parsers/ParserViewTargets.h | 24 ++ src/Storages/StorageMaterializedView.cpp | 40 ++- src/Storages/System/StorageSystemTables.cpp | 3 +- src/Storages/WindowView/StorageWindowView.cpp | 15 +- 22 files changed, 1116 insertions(+), 243 deletions(-) create mode 100644 src/Parsers/ASTViewTargets.cpp create mode 100644 src/Parsers/ASTViewTargets.h create mode 100644 src/Parsers/CreateQueryUUIDs.cpp create mode 100644 src/Parsers/CreateQueryUUIDs.h create mode 100644 src/Parsers/ParserViewTargets.cpp create mode 100644 src/Parsers/ParserViewTargets.h diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index fa8ed5855dd..cd3f963b15d 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -105,7 +105,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS auto new_query = query.clone(); adjustCreateQueryForBackup(new_query, global_context); ASTCreateQuery & create = typeid_cast(*new_query); - create.setUUID({}); + create.resetUUIDs(); create.if_not_exists = false; return new_query; }; diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index f51d6c0c1d8..9fe22f874b4 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -67,7 +68,7 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer auto it = create_query_uuids.find(query_str); if (it != create_query_uuids.end()) { - create_query.setUUID(it->second); + it->second.copyToQuery(create_query); return true; } return false; @@ -79,7 +80,8 @@ void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_quer return; } - auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + new_uuids.copyToQuery(create_query); { std::lock_guard lock{mutex}; diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index 5e51b719d63..35f93574b68 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -1,16 +1,17 @@ #pragma once #include -#include +#include +#include #include #include #include -namespace Poco { class Logger; } - namespace DB { +class ASTCreateQuery; + /// Implementation of the IRestoreCoordination interface performing coordination in memory. class RestoreCoordinationLocal : public IRestoreCoordination @@ -55,7 +56,7 @@ private: std::set> acquired_tables_in_replicated_databases; std::unordered_set acquired_data_in_replicated_tables; - std::unordered_map create_query_uuids; + std::unordered_map create_query_uuids; std::unordered_set acquired_data_in_keeper_map_tables; mutable std::mutex mutex; diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 84106737fc9..44214d00be5 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -269,7 +270,8 @@ bool RestoreCoordinationRemote::acquireInsertingDataForKeeperMap(const String & void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query) { String query_str = serializeAST(create_query); - String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString(); + CreateQueryUUIDs new_uuids{create_query, /* generate_random= */ true, /* force_random= */ true}; + String new_uuids_str = new_uuids.toString(); auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable"); holder.retries_ctl.retryLoop( @@ -281,11 +283,14 @@ void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_que Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent); if (res == Coordination::Error::ZOK) + { + new_uuids.copyToQuery(create_query); return; + } if (res == Coordination::Error::ZNODEEXISTS) { - create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path))); + CreateQueryUUIDs::fromString(zk->get(path)).copyToQuery(create_query); return; } diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index d81dc7a76d8..d149b49d465 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -80,13 +80,20 @@ namespace /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. void visitCreateQuery(const ASTCreateQuery & create) { - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty()) + if (create.targets) { - /// TO target_table (for materialized views) - if (to_table.database.empty()) - to_table.database = current_database; - dependencies.emplace(to_table); + for (const auto & target : create.targets->targets) + { + const auto & table_id = target.table_id; + if (!table_id.table_name.empty()) + { + /// TO target_table (for materialized views) + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + if (target_name.database.empty()) + target_name.database = current_database; + dependencies.emplace(target_name); + } + } } QualifiedTableName as_table{create.as_database, create.as_table}; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 6cd414635a0..38e100e2470 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -86,12 +86,19 @@ namespace create.as_table = as_table_new.table; } - QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; - if (!to_table.table.empty() && !to_table.database.empty()) + if (create.targets) { - auto to_table_new = data.renaming_map.getNewTableName(to_table); - if (to_table_new != to_table) - create.to_table_id = StorageID{to_table_new.database, to_table_new.table}; + for (auto & target : create.targets->targets) + { + auto & table_id = target.table_id; + if (!table_id.database_name.empty() && !table_id.table_name.empty()) + { + QualifiedTableName target_name{table_id.database_name, table_id.table_name}; + auto new_target_name = data.renaming_map.getNewTableName(target_name); + if (new_target_name != target_name) + table_id = StorageID{new_target_name.database, new_target_name.table}; + } + } } } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 7ce2859e962..25d1ad90a3c 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -729,81 +729,14 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ if (auto * create = query->as()) { - bool replicated_table = create->storage && create->storage->engine && - (startsWith(create->storage->engine->name, "Replicated") || startsWith(create->storage->engine->name, "Shared")); - if (!replicated_table || !create->storage->engine->arguments) - return; + if (create->storage) + checkTableEngine(*create, *create->storage, query_context); - ASTs & args_ref = create->storage->engine->arguments->children; - ASTs args = args_ref; - if (args.size() < 2) - return; - - /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. - bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); - if (has_expression_argument) + if (create->targets) { - try - { - args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); - args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); - } - catch (...) // NOLINT(bugprone-empty-catch) - { - } + for (auto inner_table_engine : create->targets->getInnerEngines()) + checkTableEngine(*create, *inner_table_engine, query_context); } - - ASTLiteral * arg1 = args[0]->as(); - ASTLiteral * arg2 = args[1]->as(); - if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) - return; - - String maybe_path = arg1->value.get(); - String maybe_replica = arg2->value.get(); - - /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. - /// Let's ensure that some macros are used. - /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. - - Macros::MacroExpansionInfo info; - info.table_id = {getDatabaseName(), create->getTable(), create->uuid}; - info.shard = getShardName(); - info.replica = getReplicaName(); - query_context->getMacros()->expand(maybe_path, info); - bool maybe_shard_macros = info.expanded_other; - info.expanded_other = false; - query_context->getMacros()->expand(maybe_replica, info); - bool maybe_replica_macros = info.expanded_other; - bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); - - if (!enable_functional_tests_helper) - { - if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, - "It's not allowed to specify explicit zookeeper_path and replica_name " - "for ReplicatedMergeTree arguments in Replicated database. If you really want to " - "specify them explicitly, enable setting " - "database_replicated_allow_replicated_engine_arguments."); - } - - if (maybe_shard_macros && maybe_replica_macros) - return; - - if (enable_functional_tests_helper && !has_expression_argument) - { - if (maybe_path.empty() || maybe_path.back() != '/') - maybe_path += '/'; - args_ref[0]->as()->value = maybe_path + "auto_{shard}"; - args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; - return; - } - - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " - "If you really want to specify it explicitly, then you should use some macros " - "to distinguish different shards and replicas"); } } @@ -827,6 +760,85 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ } } +void DatabaseReplicated::checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const +{ + bool replicated_table = storage.engine && + (startsWith(storage.engine->name, "Replicated") || startsWith(storage.engine->name, "Shared")); + if (!replicated_table || !storage.engine->arguments) + return; + + ASTs & args_ref = storage.engine->arguments->children; + ASTs args = args_ref; + if (args.size() < 2) + return; + + /// It can be a constant expression. Try to evaluate it, ignore exception if we cannot. + bool has_expression_argument = args_ref[0]->as() || args_ref[1]->as(); + if (has_expression_argument) + { + try + { + args[0] = evaluateConstantExpressionAsLiteral(args_ref[0]->clone(), query_context); + args[1] = evaluateConstantExpressionAsLiteral(args_ref[1]->clone(), query_context); + } + catch (...) // NOLINT(bugprone-empty-catch) + { + } + } + + ASTLiteral * arg1 = args[0]->as(); + ASTLiteral * arg2 = args[1]->as(); + if (!arg1 || !arg2 || arg1->value.getType() != Field::Types::String || arg2->value.getType() != Field::Types::String) + return; + + String maybe_path = arg1->value.get(); + String maybe_replica = arg2->value.get(); + + /// Looks like it's ReplicatedMergeTree with explicit zookeeper_path and replica_name arguments. + /// Let's ensure that some macros are used. + /// NOTE: we cannot check here that substituted values will be actually different on shards and replicas. + + Macros::MacroExpansionInfo info; + info.table_id = {getDatabaseName(), query.getTable(), query.uuid}; + info.shard = getShardName(); + info.replica = getReplicaName(); + query_context->getMacros()->expand(maybe_path, info); + bool maybe_shard_macros = info.expanded_other; + info.expanded_other = false; + query_context->getMacros()->expand(maybe_replica, info); + bool maybe_replica_macros = info.expanded_other; + bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); + + if (!enable_functional_tests_helper) + { + if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) + LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + else + throw Exception(ErrorCodes::INCORRECT_QUERY, + "It's not allowed to specify explicit zookeeper_path and replica_name " + "for ReplicatedMergeTree arguments in Replicated database. If you really want to " + "specify them explicitly, enable setting " + "database_replicated_allow_replicated_engine_arguments."); + } + + if (maybe_shard_macros && maybe_replica_macros) + return; + + if (enable_functional_tests_helper && !has_expression_argument) + { + if (maybe_path.empty() || maybe_path.back() != '/') + maybe_path += '/'; + args_ref[0]->as()->value = maybe_path + "auto_{shard}"; + args_ref[1]->as()->value = maybe_replica + "auto_{replica}"; + return; + } + + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " + "If you really want to specify it explicitly, then you should use some macros " + "to distinguish different shards and replicas"); +} + BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) { waitDatabaseStarted(); @@ -1312,11 +1324,9 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query); - bool is_materialized_view_with_inner_table = create.is_materialized_view && create.to_table_id.empty(); - create.setDatabase(getDatabaseName()); create.setTable(unescapeForFileName(node_name)); - create.attach = is_materialized_view_with_inner_table; + create.attach = create.is_materialized_view_with_inner_table(); return ast; } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index eab5b2ff931..8c3fa7c87f6 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -107,6 +107,7 @@ private: void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; + void checkTableEngine(const ASTCreateQuery & query, ASTStorage & storage, ContextPtr query_context) const; void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 84d7f0a587c..45e2881ae5c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -950,7 +950,7 @@ namespace throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); @@ -970,9 +970,6 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) return; - if (create.is_materialized_view && create.to_table_id) - return; - if (create.temporary) { /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. @@ -987,22 +984,44 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } if (!create.storage->engine) - { setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); - } checkTemporaryTableEngineName(create.storage->engine->name); return; } + if (create.is_materialized_view) + { + /// A materialized view with an external target doesn't need a table engine. + if (create.is_materialized_view_with_external_target()) + return; + + if (auto to_engine = create.getTargetInnerEngine(ViewTarget::To)) + { + /// This materialized view already has a storage definition. + if (!to_engine->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + setDefaultTableEngine(*to_engine, getContext()->getSettingsRef().default_table_engine.value); + } + return; + } + } + if (create.storage) { - /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + /// This table already has a storage definition. if (!create.storage->engine) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + } return; } + /// We'll try to extract a storage definition from clause `AS`: + /// CREATE TABLE table_name AS other_table_name + std::shared_ptr storage_def; if (!create.as_table.empty()) { /// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table. @@ -1018,12 +1037,14 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_ordinary_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); - if (as_create.is_materialized_view && as_create.to_table_id) + if (as_create.is_materialized_view_with_external_target()) + { throw Exception( ErrorCodes::INCORRECT_QUERY, - "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead", + "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS {}\" instead", qualified_name, - as_create.to_table_id.getQualifiedName()); + as_create.getTargetTableID(ViewTarget::To).getFullTableName()); + } if (as_create.is_live_view) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); @@ -1034,18 +1055,37 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (as_create.is_dictionary) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name); - if (as_create.storage) - create.set(create.storage, as_create.storage->ptr()); + if (as_create.is_materialized_view) + { + storage_def = as_create.getTargetInnerEngine(ViewTarget::To); + } else if (as_create.as_table_function) + { create.set(create.as_table_function, as_create.as_table_function->ptr()); + return; + } + else if (as_create.storage) + { + storage_def = typeid_cast>(as_create.storage->ptr()); + } else + { throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); - - return; + } } - create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); + if (!storage_def) + { + /// Set ENGINE by default. + storage_def = std::make_shared(); + setDefaultTableEngine(*storage_def, getContext()->getSettingsRef().default_table_engine.value); + } + + /// Use the found table engine to modify the create query. + if (create.is_materialized_view) + create.setTargetInnerEngine(ViewTarget::To, storage_def); + else + create.set(create.storage, storage_def); } void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const @@ -1087,11 +1127,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data kind_upper, create.table); } - create.generateRandomUUID(); + create.generateRandomUUIDs(); } else { - bool has_uuid = create.uuid != UUIDHelpers::Nil || create.to_inner_uuid != UUIDHelpers::Nil; + bool has_uuid = (create.uuid != UUIDHelpers::Nil) || (create.targets && create.targets->hasInnerUUIDs()); if (has_uuid && !is_on_cluster && !internal) { /// We don't show the following error message either @@ -1106,8 +1146,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + create.resetUUIDs(); } } @@ -1131,6 +1170,14 @@ void checkTableCanBeAddedWithNoCyclicDependencies(const ASTCreateQuery & create, DatabaseCatalog::instance().checkTableCanBeAddedWithNoCyclicDependencies(qualified_name, ref_dependencies, loading_dependencies); } +bool isReplicated(const ASTStorage & storage) +{ + if (!storage.engine) + return false; + const auto & storage_name = storage.engine->name; + return storage_name.starts_with("Replicated") || storage_name.starts_with("Shared"); +} + } BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) @@ -1247,8 +1294,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!create.temporary && !create.database) create.setDatabase(current_database); - if (create.to_table_id && create.to_table_id.database_name.empty()) - create.to_table_id.database_name = current_database; + + if (create.targets) + create.targets->setCurrentDatabase(current_database); if (create.select && create.isView()) { @@ -1282,12 +1330,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) TableProperties properties = getTablePropertiesAndNormalizeCreateQuery(create, mode); /// Check type compatible for materialized dest table and select columns - if (create.select && create.is_materialized_view && create.to_table_id && mode <= LoadingStrictnessLevel::CREATE) + if (create.is_materialized_view_with_external_target() && create.select && mode <= LoadingStrictnessLevel::CREATE) { - if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable( - {create.to_table_id.database_name, create.to_table_id.table_name, create.to_table_id.uuid}, - getContext() - )) + if (StoragePtr to_table = DatabaseCatalog::instance().tryGetTable(create.getTargetTableID(ViewTarget::To), getContext())) { Block input_block; @@ -1333,11 +1378,17 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; - if (create.storage && create.storage->engine) + + if (create.storage && isReplicated(*create.storage)) + is_storage_replicated = true; + + if (create.targets) { - const auto & storage_name = create.storage->engine->name; - if (storage_name.starts_with("Replicated") || storage_name.starts_with("Shared")) - is_storage_replicated = true; + for (auto inner_table_engine : create.targets->getInnerEngines()) + { + if (isReplicated(*inner_table_engine)) + is_storage_replicated = true; + } } const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; @@ -1791,7 +1842,7 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont /// For CREATE query generate UUID on initiator, so it will be the same on all hosts. /// It will be ignored if database does not support UUIDs. - create.generateRandomUUID(); + create.generateRandomUUIDs(); /// For cross-replication cluster we cannot use UUID in replica path. String cluster_name_expanded = local_context->getMacros()->expand(cluster_name); @@ -1913,8 +1964,15 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const } } - if (create.to_table_id) - required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name); + if (create.targets) + { + for (const auto & target : create.targets->targets) + { + const auto & target_id = target.table_id; + if (target_id) + required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, target_id.database_name, target_id.table_name); + } + } if (create.storage && create.storage->engine) required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 0fca7b64d5a..16add79d226 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -94,7 +94,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() { auto & create = create_query->as(); create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; + if (create.targets) + create.targets->resetInnerUUIDs(); } MutableColumnPtr column = ColumnString::create(); diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index d56a2724914..770a63c6e75 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -240,12 +242,12 @@ ASTPtr ASTCreateQuery::clone() const res->set(res->columns_list, columns_list->clone()); if (storage) res->set(res->storage, storage->clone()); - if (inner_storage) - res->set(res->inner_storage, inner_storage->clone()); if (select) res->set(res->select, select->clone()); if (table_overrides) res->set(res->table_overrides, table_overrides->clone()); + if (targets) + res->set(res->targets, targets->clone()); if (dictionary) { @@ -388,20 +390,18 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat refresh_strategy->formatImpl(settings, state, frame); } - if (to_table_id) + if (auto to_table_id = getTargetTableID(ViewTarget::To)) { - assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil); - settings.ostr - << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "") - << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") - << backQuoteIfNeed(to_table_id.table_name); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO) + << (settings.hilite ? hilite_none : "") << " " + << (!to_table_id.database_name.empty() ? backQuoteIfNeed(to_table_id.database_name) + "." : "") + << backQuoteIfNeed(to_table_id.table_name); } - if (to_inner_uuid != UUIDHelpers::Nil) + if (auto to_inner_uuid = getTargetInnerUUID(ViewTarget::To); to_inner_uuid != UUIDHelpers::Nil) { - assert(is_materialized_view && !to_table_id); - settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO INNER UUID " << (settings.hilite ? hilite_none : "") - << quoteString(toString(to_inner_uuid)); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::TO_INNER_UUID) + << (settings.hilite ? hilite_none : "") << " " << quoteString(toString(to_inner_uuid)); } bool should_add_empty = is_create_empty; @@ -461,14 +461,17 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat frame.expression_list_always_start_on_new_line = false; - if (inner_storage) + if (storage) + storage->formatImpl(settings, state, frame); + + if (auto inner_storage = getTargetInnerEngine(ViewTarget::Inner)) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " INNER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << (settings.hilite ? hilite_keyword : "") << toStringView(Keyword::INNER) << (settings.hilite ? hilite_none : ""); inner_storage->formatImpl(settings, state, frame); } - if (storage) - storage->formatImpl(settings, state, frame); + if (auto to_storage = getTargetInnerEngine(ViewTarget::To)) + to_storage->formatImpl(settings, state, frame); if (dictionary) dictionary->formatImpl(settings, state, frame); @@ -528,48 +531,50 @@ bool ASTCreateQuery::isParameterizedView() const } -ASTCreateQuery::UUIDs::UUIDs(const ASTCreateQuery & query) - : uuid(query.uuid) - , to_inner_uuid(query.to_inner_uuid) +void ASTCreateQuery::generateRandomUUIDs() { + CreateQueryUUIDs{*this, /* generate_random= */ true}.copyToQuery(*this); } -String ASTCreateQuery::UUIDs::toString() const +void ASTCreateQuery::resetUUIDs() { - WriteBufferFromOwnString out; - out << "{" << uuid << "," << to_inner_uuid << "}"; - return out.str(); + CreateQueryUUIDs{}.copyToQuery(*this); } -ASTCreateQuery::UUIDs ASTCreateQuery::UUIDs::fromString(const String & str) + +StorageID ASTCreateQuery::getTargetTableID(ViewTarget::Kind target_kind) const { - ReadBufferFromString in{str}; - ASTCreateQuery::UUIDs res; - in >> "{" >> res.uuid >> "," >> res.to_inner_uuid >> "}"; - return res; + if (targets) + return targets->getTableID(target_kind); + return StorageID::createEmpty(); } -ASTCreateQuery::UUIDs ASTCreateQuery::generateRandomUUID(bool always_generate_new_uuid) +bool ASTCreateQuery::hasTargetTableID(ViewTarget::Kind target_kind) const { - if (always_generate_new_uuid) - setUUID({}); - - if (uuid == UUIDHelpers::Nil) - uuid = UUIDHelpers::generateV4(); - - /// If destination table (to_table_id) is not specified for materialized view, - /// then MV will create inner table. We should generate UUID of inner table here. - bool need_uuid_for_inner_table = !attach && is_materialized_view && !to_table_id; - if (need_uuid_for_inner_table && (to_inner_uuid == UUIDHelpers::Nil)) - to_inner_uuid = UUIDHelpers::generateV4(); - - return UUIDs{*this}; + if (targets) + return targets->hasTableID(target_kind); + return false; } -void ASTCreateQuery::setUUID(const UUIDs & uuids) +UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const { - uuid = uuids.uuid; - to_inner_uuid = uuids.to_inner_uuid; + if (targets) + return targets->getInnerUUID(target_kind); + return UUIDHelpers::Nil; +} + +std::shared_ptr ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const +{ + if (targets) + return targets->getInnerEngine(target_kind); + return nullptr; +} + +void ASTCreateQuery::setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def) +{ + if (!targets) + set(targets, std::make_shared()); + targets->setInnerEngine(target_kind, storage_def); } } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 6fbf045915b..f751a09169c 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace DB class ASTFunction; class ASTSetQuery; class ASTSelectWithUnionQuery; +struct CreateQueryUUIDs; class ASTStorage : public IAST @@ -101,17 +103,15 @@ public: bool has_uuid{false}; // CREATE TABLE x UUID '...' ASTColumns * columns_list = nullptr; - - StorageID to_table_id = StorageID::createEmpty(); /// For CREATE MATERIALIZED VIEW mv TO table. - UUID to_inner_uuid = UUIDHelpers::Nil; /// For materialized view with inner table - ASTStorage * inner_storage = nullptr; /// For window view with inner table ASTStorage * storage = nullptr; + ASTPtr watermark_function; ASTPtr lateness_function; String as_database; String as_table; IAST * as_table_function = nullptr; ASTSelectWithUnionQuery * select = nullptr; + ASTViewTargets * targets = nullptr; IAST * comment = nullptr; ASTPtr sql_security = nullptr; @@ -153,17 +153,25 @@ public: QueryKind getQueryKind() const override { return QueryKind::Create; } - struct UUIDs - { - UUID uuid = UUIDHelpers::Nil; - UUID to_inner_uuid = UUIDHelpers::Nil; - UUIDs() = default; - explicit UUIDs(const ASTCreateQuery & query); - String toString() const; - static UUIDs fromString(const String & str); - }; - UUIDs generateRandomUUID(bool always_generate_new_uuid = false); - void setUUID(const UUIDs & uuids); + /// Generates a random UUID for this create query if it's not specified already. + /// The function also generates random UUIDs for inner target tables if this create query implies that + /// (for example, if it's a `CREATE MATERIALIZED VIEW` query with an inner storage). + void generateRandomUUIDs(); + + /// Removes UUID from this create query. + /// The function also removes UUIDs for inner target tables from this create query (see also generateRandomUUID()). + void resetUUIDs(); + + /// Returns information about a target table. + /// If that information isn't specified in this create query (or even not allowed) then the function returns an empty value. + StorageID getTargetTableID(ViewTarget::Kind target_kind) const; + bool hasTargetTableID(ViewTarget::Kind target_kind) const; + UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const; + std::shared_ptr getTargetInnerEngine(ViewTarget::Kind target_kind) const; + void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def); + + bool is_materialized_view_with_external_target() const { return is_materialized_view && hasTargetTableID(ViewTarget::To); } + bool is_materialized_view_with_inner_table() const { return is_materialized_view && !hasTargetTableID(ViewTarget::To); } protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; @@ -171,8 +179,8 @@ protected: void forEachPointerToChild(std::function f) override { f(reinterpret_cast(&columns_list)); - f(reinterpret_cast(&inner_storage)); f(reinterpret_cast(&storage)); + f(reinterpret_cast(&targets)); f(reinterpret_cast(&as_table_function)); f(reinterpret_cast(&select)); f(reinterpret_cast(&comment)); diff --git a/src/Parsers/ASTViewTargets.cpp b/src/Parsers/ASTViewTargets.cpp new file mode 100644 index 00000000000..38f103b6e55 --- /dev/null +++ b/src/Parsers/ASTViewTargets.cpp @@ -0,0 +1,300 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +std::string_view toString(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return "to"; + case ViewTarget::Inner: return "inner"; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "{} doesn't support kind {}", __FUNCTION__, kind); +} + +void parseFromString(ViewTarget::Kind & out, std::string_view str) +{ + for (auto kind : magic_enum::enum_values()) + { + if (toString(kind) == str) + { + out = kind; + return; + } + } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: Unexpected string {}", __FUNCTION__, str); +} + + +std::vector ASTViewTargets::getKinds() const +{ + std::vector kinds; + kinds.reserve(targets.size()); + for (auto & target : targets) + kinds.push_back(target.kind); + return kinds; +} + + +void ASTViewTargets::setTableID(ViewTarget::Kind kind, const StorageID & table_id_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.table_id = table_id_; + return; + } + } + if (table_id_) + targets.emplace_back(kind).table_id = table_id_; +} + +StorageID ASTViewTargets::getTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->table_id; + return StorageID::createEmpty(); +} + +bool ASTViewTargets::hasTableID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return !target->table_id.empty(); + return false; +} + +void ASTViewTargets::setCurrentDatabase(const String & current_database) +{ + for (auto & target : targets) + { + auto & table_id = target.table_id; + if (!table_id.table_name.empty() && table_id.database_name.empty()) + table_id.database_name = current_database; + } +} + +void ASTViewTargets::setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_) +{ + for (auto & target : targets) + { + if (target.kind == kind) + { + target.inner_uuid = inner_uuid_; + return; + } + } + if (inner_uuid_ != UUIDHelpers::Nil) + targets.emplace_back(kind).inner_uuid = inner_uuid_; +} + +UUID ASTViewTargets::getInnerUUID(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_uuid; + return UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUID(ViewTarget::Kind kind) const +{ + return getInnerUUID(kind) != UUIDHelpers::Nil; +} + +void ASTViewTargets::resetInnerUUIDs() +{ + for (auto & target : targets) + target.inner_uuid = UUIDHelpers::Nil; +} + +bool ASTViewTargets::hasInnerUUIDs() const +{ + for (auto & target : targets) + { + if (target.inner_uuid != UUIDHelpers::Nil) + return true; + } + return false; +} + +void ASTViewTargets::setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def) +{ + auto new_inner_engine = typeid_cast>(storage_def); + if (!new_inner_engine && storage_def) + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Bad cast from type {} to ASTStorage", storage_def->getID()); + + for (auto & target : targets) + { + if (target.kind == kind) + { + if (target.inner_engine == new_inner_engine) + return; + if (new_inner_engine) + children.push_back(new_inner_engine); + if (target.inner_engine) + std::erase(children, target.inner_engine); + target.inner_engine = new_inner_engine; + return; + } + } + + if (new_inner_engine) + { + targets.emplace_back(kind).inner_engine = new_inner_engine; + children.push_back(new_inner_engine); + } +} + +std::shared_ptr ASTViewTargets::getInnerEngine(ViewTarget::Kind kind) const +{ + if (const auto * target = tryGetTarget(kind)) + return target->inner_engine; + return nullptr; +} + +std::vector> ASTViewTargets::getInnerEngines() const +{ + std::vector> res; + res.reserve(targets.size()); + for (const auto & target : targets) + { + if (target.inner_engine) + res.push_back(target.inner_engine); + } + return res; +} + +const ViewTarget * ASTViewTargets::tryGetTarget(ViewTarget::Kind kind) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + return ⌖ + } + return nullptr; +} + +ASTPtr ASTViewTargets::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + for (auto & target : res->targets) + { + if (target.inner_engine) + { + target.inner_engine = typeid_cast>(target.inner_engine->clone()); + res->children.push_back(target.inner_engine); + } + } + return res; +} + +void ASTViewTargets::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + formatTarget(target, s, state, frame); +} + +void ASTViewTargets::formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + for (const auto & target : targets) + { + if (target.kind == kind) + formatTarget(target, s, state, frame); + } +} + +void ASTViewTargets::formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame) +{ + if (target.table_id) + { + auto keyword = getKeywordForTableID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No keyword for table name of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " + << (!target.table_id.database_name.empty() ? backQuoteIfNeed(target.table_id.database_name) + "." : "") + << backQuoteIfNeed(target.table_id.table_name); + } + + if (target.inner_uuid != UUIDHelpers::Nil) + { + auto keyword = getKeywordForInnerUUID(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for inner UUID of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) + << (s.hilite ? hilite_none : "") << " " << quoteString(toString(target.inner_uuid)); + } + + if (target.inner_engine) + { + auto keyword = getKeywordForInnerStorage(target.kind); + if (!keyword) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No prefix keyword for table engine of kind {}", toString(target.kind)); + s.ostr << " " << (s.hilite ? hilite_keyword : "") << toStringView(*keyword) << (s.hilite ? hilite_none : ""); + target.inner_engine->formatImpl(s, state, frame); + } +} + +std::optional ASTViewTargets::getKeywordForTableID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO; /// TO mydb.mydata + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerStorage(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return std::nullopt; /// ENGINE = MergeTree() + case ViewTarget::Inner: return Keyword::INNER; /// INNER ENGINE = MergeTree() + } + UNREACHABLE(); +} + +std::optional ASTViewTargets::getKeywordForInnerUUID(ViewTarget::Kind kind) +{ + switch (kind) + { + case ViewTarget::To: return Keyword::TO_INNER_UUID; /// TO INNER UUID 'XXX' + case ViewTarget::Inner: return std::nullopt; + } + UNREACHABLE(); +} + +void ASTViewTargets::forEachPointerToChild(std::function f) +{ + for (auto & target : targets) + { + if (target.inner_engine) + { + ASTStorage * new_inner_engine = target.inner_engine.get(); + f(reinterpret_cast(&new_inner_engine)); + if (new_inner_engine != target.inner_engine.get()) + { + if (new_inner_engine) + target.inner_engine = typeid_cast>(new_inner_engine->ptr()); + else + target.inner_engine.reset(); + } + } + } +} + +} diff --git a/src/Parsers/ASTViewTargets.h b/src/Parsers/ASTViewTargets.h new file mode 100644 index 00000000000..33a7bc5fcb1 --- /dev/null +++ b/src/Parsers/ASTViewTargets.h @@ -0,0 +1,102 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ASTStorage; +enum class Keyword : size_t; + +/// Information about the target table for a materialized view or a window view. +struct ViewTarget +{ + enum Kind + { + /// Target table for a materialized view or a window view. + To, + + /// Table with intermediate results for a window view. + Inner, + }; + + Kind kind = To; + + /// StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + StorageID table_id = StorageID::createEmpty(); + + /// UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + UUID inner_uuid = UUIDHelpers::Nil; + + /// Table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + std::shared_ptr inner_engine; +}; + +/// Converts ViewTarget::Kind to a string. +std::string_view toString(ViewTarget::Kind kind); +void parseFromString(ViewTarget::Kind & out, std::string_view str); + + +/// Information about all the target tables for a view. +class ASTViewTargets : public IAST +{ +public: + std::vector targets; + + /// Sets the StorageID of the target table, if it's not inner. + /// That storage ID can be seen for example after "TO" in a statement like CREATE MATERIALIZED VIEW ... TO ... + void setTableID(ViewTarget::Kind kind, const StorageID & table_id_); + StorageID getTableID(ViewTarget::Kind kind) const; + bool hasTableID(ViewTarget::Kind kind) const; + + /// Replaces an empty database in the StorageID of the target table with a specified database. + void setCurrentDatabase(const String & current_database); + + /// Sets the UUID of the target table, if it's inner. + /// The UUID is calculated automatically and can be seen for example after "TO INNER UUID" in a statement like + /// CREATE MATERIALIZED VIEW ... TO INNER UUID ... + void setInnerUUID(ViewTarget::Kind kind, const UUID & inner_uuid_); + UUID getInnerUUID(ViewTarget::Kind kind) const; + bool hasInnerUUID(ViewTarget::Kind kind) const; + + void resetInnerUUIDs(); + bool hasInnerUUIDs() const; + + /// Sets the table engine of the target table, if it's inner. + /// That engine can be seen for example after "ENGINE" in a statement like CREATE MATERIALIZED VIEW ... ENGINE ... + void setInnerEngine(ViewTarget::Kind kind, ASTPtr storage_def); + std::shared_ptr getInnerEngine(ViewTarget::Kind kind) const; + std::vector> getInnerEngines() const; + + /// Returns a list of all kinds of views in this ASTViewTargets. + std::vector getKinds() const; + + /// Returns information about a target table. + /// The function returns null if such target doesn't exist. + const ViewTarget * tryGetTarget(ViewTarget::Kind kind) const; + + String getID(char) const override { return "ViewTargets"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + /// Formats information only about a specific target table. + void formatTarget(ViewTarget::Kind kind, const FormatSettings & s, FormatState & state, FormatStateStacked frame) const; + static void formatTarget(const ViewTarget & target, const FormatSettings & s, FormatState & state, FormatStateStacked frame); + + /// Helper functions for class ParserViewTargets. Returns a prefix keyword matching a specified target kind. + static std::optional getKeywordForTableID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerUUID(ViewTarget::Kind kind); + static std::optional getKeywordForInnerStorage(ViewTarget::Kind kind); + +protected: + void forEachPointerToChild(std::function f) override; +}; + +} diff --git a/src/Parsers/CreateQueryUUIDs.cpp b/src/Parsers/CreateQueryUUIDs.cpp new file mode 100644 index 00000000000..4dfee67b537 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include +#include +#include + + +namespace DB +{ + +CreateQueryUUIDs::CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random, bool force_random) +{ + if (!generate_random || !force_random) + { + uuid = query.uuid; + if (query.targets) + { + for (const auto & target : query.targets->targets) + setTargetInnerUUID(target.kind, target.inner_uuid); + } + } + + if (generate_random) + { + if (uuid == UUIDHelpers::Nil) + uuid = UUIDHelpers::generateV4(); + + /// For an ATTACH query we should never generate UUIDs for its inner target tables + /// because for an ATTACH query those inner target tables probably already exist and can be accessible by names. + /// If we generate random UUIDs for already existing tables then those UUIDs will not be correct making those inner target table inaccessible. + /// Thus it's not safe for example to replace + /// "ATTACH MATERIALIZED VIEW mv AS SELECT a FROM b" with + /// "ATTACH MATERIALIZED VIEW mv TO INNER UUID "XXXX" AS SELECT a FROM b" + /// This replacement is safe only for CREATE queries when inner target tables don't exist yet. + if (!query.attach) + { + auto generate_target_uuid = [&](ViewTarget::Kind target_kind) + { + if ((query.getTargetInnerUUID(target_kind) == UUIDHelpers::Nil) && query.getTargetTableID(target_kind).empty()) + setTargetInnerUUID(target_kind, UUIDHelpers::generateV4()); + }; + + /// If destination table (to_table_id) is not specified for materialized view, + /// then MV will create inner table. We should generate UUID of inner table here. + if (query.is_materialized_view) + generate_target_uuid(ViewTarget::To); + } + } +} + +bool CreateQueryUUIDs::empty() const +{ + if (uuid != UUIDHelpers::Nil) + return false; + for (const auto & [_, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + return false; + } + return true; +} + +String CreateQueryUUIDs::toString() const +{ + WriteBufferFromOwnString out; + out << "{"; + bool need_comma = false; + auto add_name_and_uuid_to_string = [&](std::string_view name_, const UUID & uuid_) + { + if (std::exchange(need_comma, true)) + out << ", "; + out << "\"" << name_ << "\": \"" << uuid_ << "\""; + }; + if (uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string("uuid", uuid); + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + add_name_and_uuid_to_string(::DB::toString(kind), inner_uuid); + } + out << "}"; + return out.str(); +} + +CreateQueryUUIDs CreateQueryUUIDs::fromString(const String & str) +{ + ReadBufferFromString in{str}; + CreateQueryUUIDs res; + skipWhitespaceIfAny(in); + in >> "{"; + skipWhitespaceIfAny(in); + char c; + while (in.peek(c) && c != '}') + { + String name; + String value; + readDoubleQuotedString(name, in); + skipWhitespaceIfAny(in); + in >> ":"; + skipWhitespaceIfAny(in); + readDoubleQuotedString(value, in); + skipWhitespaceIfAny(in); + if (name == "uuid") + { + res.uuid = parse(value); + } + else + { + ViewTarget::Kind kind; + parseFromString(kind, name); + res.setTargetInnerUUID(kind, parse(value)); + } + if (in.peek(c) && c == ',') + { + in.ignore(1); + skipWhitespaceIfAny(in); + } + } + in >> "}"; + return res; +} + +void CreateQueryUUIDs::setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid) +{ + for (auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + { + pair.second = new_inner_uuid; + return; + } + } + if (new_inner_uuid != UUIDHelpers::Nil) + targets_inner_uuids.emplace_back(kind, new_inner_uuid); +} + +UUID CreateQueryUUIDs::getTargetInnerUUID(ViewTarget::Kind kind) const +{ + for (const auto & pair : targets_inner_uuids) + { + if (pair.first == kind) + return pair.second; + } + return UUIDHelpers::Nil; +} + +void CreateQueryUUIDs::copyToQuery(ASTCreateQuery & query) const +{ + query.uuid = uuid; + + if (query.targets) + query.targets->resetInnerUUIDs(); + + if (!targets_inner_uuids.empty()) + { + if (!query.targets) + query.set(query.targets, std::make_shared()); + + for (const auto & [kind, inner_uuid] : targets_inner_uuids) + { + if (inner_uuid != UUIDHelpers::Nil) + query.targets->setInnerUUID(kind, inner_uuid); + } + } +} + +} diff --git a/src/Parsers/CreateQueryUUIDs.h b/src/Parsers/CreateQueryUUIDs.h new file mode 100644 index 00000000000..419dad24b35 --- /dev/null +++ b/src/Parsers/CreateQueryUUIDs.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +namespace DB +{ +class ASTCreateQuery; + +/// The UUID of a table or a database defined with a CREATE QUERY along with the UUIDs of its inner targets. +struct CreateQueryUUIDs +{ + CreateQueryUUIDs() = default; + + /// Collect UUIDs from ASTCreateQuery. + /// Parameters: + /// `generate_random` - if it's true then unspecified in the query UUIDs will be generated randomly; + /// `force_random` - if it's true then all UUIDs (even specified in the query) will be (re)generated randomly. + explicit CreateQueryUUIDs(const ASTCreateQuery & query, bool generate_random = false, bool force_random = false); + + bool empty() const; + explicit operator bool() const { return !empty(); } + + String toString() const; + static CreateQueryUUIDs fromString(const String & str); + + void setTargetInnerUUID(ViewTarget::Kind kind, const UUID & new_inner_uuid); + UUID getTargetInnerUUID(ViewTarget::Kind kind) const; + + /// Copies UUIDs to ASTCreateQuery. + void copyToQuery(ASTCreateQuery & query) const; + + /// UUID of the table. + UUID uuid = UUIDHelpers::Nil; + + /// UUIDs of its target table (or tables). + std::vector> targets_inner_uuids; +}; + +} diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 014dc7bd3bf..41379a845e7 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -693,7 +694,8 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ASTPtr table; ASTPtr columns_list; - ASTPtr storage; + std::shared_ptr storage; + ASTPtr targets; ASTPtr as_database; ASTPtr as_table; ASTPtr as_table_function; @@ -773,6 +775,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return true; } + auto parse_storage = [&] + { + chassert(!storage); + ASTPtr ast; + if (!storage_p.parse(pos, ast, expected)) + return false; + + storage = typeid_cast>(ast); + return true; + }; + auto need_parse_as_select = [&is_create_empty, &pos, &expected]() { if (ParserKeyword{Keyword::EMPTY_AS}.ignore(pos, expected)) @@ -798,7 +811,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!s_rparen.ignore(pos, expected)) return false; - auto storage_parse_result = storage_p.parse(pos, storage, expected); + auto storage_parse_result = parse_storage(); if ((storage_parse_result || is_temporary) && need_parse_as_select()) { @@ -820,7 +833,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe */ else { - storage_p.parse(pos, storage, expected); + parse_storage(); /// CREATE|ATTACH TABLE ... AS ... if (need_parse_as_select()) @@ -843,7 +856,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// Optional - ENGINE can be specified. if (!storage) - storage_p.parse(pos, storage, expected); + parse_storage(); } } } @@ -904,6 +917,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); query->is_create_empty = is_create_empty; if (from_path) @@ -977,6 +991,13 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return false; } + std::shared_ptr targets; + if (to_table) + { + targets = std::make_shared(); + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + } + /// Optional - a list of columns can be specified. It must fully comply with SELECT. if (s_lparen.ignore(pos, expected)) { @@ -1017,14 +1038,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); if (comment) query->set(query->comment, comment); @@ -1139,6 +1158,18 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & storage_p.parse(pos, storage, expected); } + std::shared_ptr targets; + if (to_table || storage || inner_storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); + if (inner_storage) + targets->setInnerEngine(ViewTarget::Inner, inner_storage); + } + // WATERMARK if (s_watermark.ignore(pos, expected)) { @@ -1195,12 +1226,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); - query->set(query->inner_storage, inner_storage); + query->is_watermark_strictly_ascending = is_watermark_strictly_ascending; query->is_watermark_ascending = is_watermark_ascending; query->is_watermark_bounded = is_watermark_bounded; @@ -1213,6 +1240,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } @@ -1436,6 +1464,7 @@ bool ParserCreateDatabaseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e return true; } + bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create(Keyword::CREATE); @@ -1622,13 +1651,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->table) query->children.push_back(query->table); - if (to_table) - query->to_table_id = to_table->as()->getTableId(); - if (to_inner_uuid) - query->to_inner_uuid = parseFromString(to_inner_uuid->as()->value.get()); - query->set(query->columns_list, columns_list); - query->set(query->storage, storage); + if (refresh_strategy) query->set(query->refresh_strategy, refresh_strategy); if (comment) @@ -1639,29 +1663,41 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (query->columns_list && query->columns_list->primary_key) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); - - query->storage->primary_key = query->columns_list->primary_key; - + storage_ref.primary_key = query->columns_list->primary_key; } if (query->columns_list && (query->columns_list->primary_key_from_columns)) { /// If engine is not set will use default one - if (!query->storage) - query->set(query->storage, std::make_shared()); - else if (query->storage->primary_key) + if (!storage) + storage = std::make_shared(); + auto & storage_ref = typeid_cast(*storage); + if (storage_ref.primary_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + storage_ref.primary_key = query->columns_list->primary_key_from_columns; + } - query->storage->primary_key = query->columns_list->primary_key_from_columns; + std::shared_ptr targets; + if (to_table || to_inner_uuid || storage) + { + targets = std::make_shared(); + if (to_table) + targets->setTableID(ViewTarget::To, to_table->as()->getTableId()); + if (to_inner_uuid) + targets->setInnerUUID(ViewTarget::To, parseFromString(to_inner_uuid->as()->value.safeGet())); + if (storage) + targets->setInnerEngine(ViewTarget::To, storage); } tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); + query->set(query->targets, targets); return true; } diff --git a/src/Parsers/ParserViewTargets.cpp b/src/Parsers/ParserViewTargets.cpp new file mode 100644 index 00000000000..8f010882cdd --- /dev/null +++ b/src/Parsers/ParserViewTargets.cpp @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +ParserViewTargets::ParserViewTargets() +{ + for (auto kind : magic_enum::enum_values()) + accept_kinds.push_back(kind); +} + +bool ParserViewTargets::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserStringLiteral literal_p; + ParserStorage storage_p{ParserStorage::TABLE_ENGINE}; + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + + std::shared_ptr res; + + auto result = [&] -> ASTViewTargets & + { + if (!res) + res = std::make_shared(); + return *res; + }; + + for (;;) + { + auto start = pos; + for (auto kind : accept_kinds) + { + auto current = pos; + + auto keyword = ASTViewTargets::getKeywordForInnerUUID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (literal_p.parse(pos, ast, expected)) + { + result().setInnerUUID(kind, parseFromString(ast->as()->value.safeGet())); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForInnerStorage(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (storage_p.parse(pos, ast, expected)) + { + result().setInnerEngine(kind, ast); + break; + } + } + pos = current; + + keyword = ASTViewTargets::getKeywordForTableID(kind); + if (keyword && ParserKeyword{*keyword}.ignore(pos, expected)) + { + ASTPtr ast; + if (table_name_p.parse(pos, ast, expected)) + { + result().setTableID(kind, ast->as()->getTableId()); + break; + } + } + pos = current; + } + if (pos == start) + break; + } + + if (!res || res->targets.empty()) + return false; + + node = res; + return true; +} + +} diff --git a/src/Parsers/ParserViewTargets.h b/src/Parsers/ParserViewTargets.h new file mode 100644 index 00000000000..f5d1850e974 --- /dev/null +++ b/src/Parsers/ParserViewTargets.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/// Parses information about target views of a table. +class ParserViewTargets : public IParserBase +{ +public: + ParserViewTargets(); + explicit ParserViewTargets(const std::vector & accept_kinds_) : accept_kinds(accept_kinds_) { } + +protected: + const char * getName() const override { return "ViewTargets"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + std::vector accept_kinds; +}; + +} diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 57d95a98f11..b603d0ecf87 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -93,11 +93,6 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); - auto * storage_def = query.storage; - if (storage_def && storage_def->primary_key) - storage_metadata.primary_key = KeyDescription::getKeyFromAST(storage_def->primary_key->ptr(), - storage_metadata.columns, - local_context->getGlobalContext()); if (query.sql_security) storage_metadata.setSQLSecurity(query.sql_security->as()); @@ -110,12 +105,21 @@ StorageMaterializedView::StorageMaterializedView( throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// If the destination table is not set, use inner table - has_inner_table = query.to_table_id.empty(); - if (has_inner_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_table = to_table_id.empty(); + auto to_inner_uuid = query.getTargetInnerUUID(ViewTarget::To); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a MaterializedView query: " "either ENGINE or an existing table in a TO clause"); + if (to_table_engine && to_table_engine->primary_key) + storage_metadata.primary_key = KeyDescription::getKeyFromAST(to_table_engine->primary_key->ptr(), + storage_metadata.columns, + local_context->getGlobalContext()); + auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), query.refresh_strategy != nullptr, local_context); if (select.select_table_id) { @@ -135,25 +139,25 @@ StorageMaterializedView::StorageMaterializedView( setInMemoryMetadata(storage_metadata); - bool point_to_itself_by_uuid = has_inner_table && query.to_inner_uuid != UUIDHelpers::Nil - && query.to_inner_uuid == table_id_.uuid; - bool point_to_itself_by_name = !has_inner_table && query.to_table_id.database_name == table_id_.database_name - && query.to_table_id.table_name == table_id_.table_name; + bool point_to_itself_by_uuid = has_inner_table && to_inner_uuid != UUIDHelpers::Nil + && to_inner_uuid == table_id_.uuid; + bool point_to_itself_by_name = !has_inner_table && to_table_id.database_name == table_id_.database_name + && to_table_id.table_name == table_id_.table_name; if (point_to_itself_by_uuid || point_to_itself_by_name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName()); if (!has_inner_table) { - target_table_id = query.to_table_id; + target_table_id = to_table_id; } else if (LoadingStrictnessLevel::ATTACH <= mode) { /// If there is an ATTACH request, then the internal table must already be created. - target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), query.to_inner_uuid); + target_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()), to_inner_uuid); } else { - const String & engine = query.storage->engine->name; + const String & engine = to_table_engine->engine->name; const auto & storage_features = StorageFactory::instance().getStorageFeatures(engine); /// We will create a query to create an internal table. @@ -161,8 +165,8 @@ StorageMaterializedView::StorageMaterializedView( auto manual_create_query = std::make_shared(); manual_create_query->setDatabase(getStorageID().database_name); manual_create_query->setTable(generateInnerTableName(getStorageID())); - manual_create_query->uuid = query.to_inner_uuid; - manual_create_query->has_uuid = query.to_inner_uuid != UUIDHelpers::Nil; + manual_create_query->uuid = to_inner_uuid; + manual_create_query->has_uuid = to_inner_uuid != UUIDHelpers::Nil; auto new_columns_list = std::make_shared(); new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); @@ -184,7 +188,9 @@ StorageMaterializedView::StorageMaterializedView( } manual_create_query->set(manual_create_query->columns_list, new_columns_list); - manual_create_query->set(manual_create_query->storage, query.storage->ptr()); + + if (to_table_engine) + manual_create_query->set(manual_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter(manual_create_query, create_context); create_interpreter.setInternal(true); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 783b899c978..14af3bad700 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -470,7 +470,8 @@ protected: if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) { ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; + if (ast_create->targets) + ast_create->targets->resetInnerUUIDs(); } if (columns_mask[src_index++]) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b842cdda022..2b1d39fd3b6 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1202,8 +1202,11 @@ StorageWindowView::StorageWindowView( setInMemoryMetadata(storage_metadata); /// If the target table is not set, use inner target table - has_inner_target_table = query.to_table_id.empty(); - if (has_inner_target_table && !query.storage) + auto to_table_id = query.getTargetTableID(ViewTarget::To); + has_inner_target_table = to_table_id.empty(); + auto to_table_engine = query.getTargetInnerEngine(ViewTarget::To); + + if (has_inner_target_table && !to_table_engine) throw Exception(ErrorCodes::INCORRECT_QUERY, "You must specify where to save results of a WindowView query: " "either ENGINE or an existing table in a TO clause"); @@ -1218,12 +1221,12 @@ StorageWindowView::StorageWindowView( auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as(), context_); - if (query.inner_storage) - inner_table_engine = query.inner_storage->clone(); + if (auto inner_storage = query.getTargetInnerEngine(ViewTarget::Inner)) + inner_table_engine = inner_storage->clone(); inner_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID())); inner_fetch_query = generateInnerFetchQuery(inner_table_id); - target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : query.to_table_id; + target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : to_table_id; if (is_proctime) next_fire_signal = getWindowUpperBound(now()); @@ -1248,7 +1251,7 @@ StorageWindowView::StorageWindowView( new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr()); target_create_query->set(target_create_query->columns_list, new_columns_list); - target_create_query->set(target_create_query->storage, query.storage->ptr()); + target_create_query->set(target_create_query->storage, to_table_engine); InterpreterCreateQuery create_interpreter_(target_create_query, create_context_); create_interpreter_.setInternal(true); From 5608914bca8b36920f8012fa48b6617512629cfe Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 17 Jul 2024 16:59:31 +0100 Subject: [PATCH 24/57] impl --- base/base/defines.h | 15 +++++++++++---- src/Analyzer/QueryTreePassManager.cpp | 6 +++--- src/Columns/ColumnAggregateFunction.cpp | 4 ++-- src/Columns/ColumnAggregateFunction.h | 6 +++--- src/Columns/ColumnArray.cpp | 6 +++--- src/Columns/ColumnArray.h | 6 +++--- src/Columns/ColumnCompressed.h | 4 ++-- src/Columns/ColumnConst.h | 8 ++++---- src/Columns/ColumnDecimal.cpp | 4 ++-- src/Columns/ColumnDecimal.h | 8 ++++---- src/Columns/ColumnDynamic.cpp | 8 ++++---- src/Columns/ColumnDynamic.h | 4 ++-- src/Columns/ColumnFixedString.cpp | 6 +++--- src/Columns/ColumnFixedString.h | 8 ++++---- src/Columns/ColumnFunction.cpp | 4 ++-- src/Columns/ColumnFunction.h | 6 +++--- src/Columns/ColumnLowCardinality.cpp | 6 +++--- src/Columns/ColumnLowCardinality.h | 6 +++--- src/Columns/ColumnMap.cpp | 8 ++++---- src/Columns/ColumnMap.h | 4 ++-- src/Columns/ColumnNullable.cpp | 8 ++++---- src/Columns/ColumnNullable.h | 6 +++--- src/Columns/ColumnObject.cpp | 4 ++-- src/Columns/ColumnObject.h | 4 ++-- src/Columns/ColumnSparse.cpp | 6 +++--- src/Columns/ColumnSparse.h | 6 +++--- src/Columns/ColumnString.cpp | 4 ++-- src/Columns/ColumnString.h | 8 ++++---- src/Columns/ColumnTuple.cpp | 8 ++++---- src/Columns/ColumnTuple.h | 6 +++--- src/Columns/ColumnUnique.h | 4 ++-- src/Columns/ColumnVariant.cpp | 8 ++++---- src/Columns/ColumnVariant.h | 4 ++-- src/Columns/ColumnVector.cpp | 2 +- src/Columns/ColumnVector.h | 8 ++++---- src/Columns/IColumn.cpp | 2 +- src/Columns/IColumn.h | 10 +++++----- src/Columns/IColumnDummy.h | 6 +++--- src/Columns/IColumnUnique.h | 2 +- .../benchmark_column_insert_many_from.cpp | 2 +- .../Config/AbstractConfigurationComparison.cpp | 2 +- src/Common/MemoryTracker.cpp | 2 +- src/Common/PageCache.cpp | 2 +- src/Common/assert_cast.h | 2 +- src/Common/tests/gtest_rw_lock.cpp | 2 +- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 6 +++--- src/IO/tests/gtest_memory_resize.cpp | 6 +++--- src/IO/tests/gtest_writebuffer_s3.cpp | 4 ++-- src/Interpreters/Cache/FileCache.cpp | 8 ++++---- src/Interpreters/Cache/FileSegment.cpp | 6 +++--- src/Interpreters/Cache/Metadata.cpp | 2 +- src/Interpreters/executeDDLQueryOnCluster.cpp | 2 +- .../gtest_exception_on_incorrect_pipeline.cpp | 2 +- .../tests/gtest_check_sorted_stream.cpp | 8 ++++---- src/Server/TCPHandler.cpp | 2 +- src/Storages/MaterializedView/RefreshTask.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- 57 files changed, 152 insertions(+), 145 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index 2fc54c37bde..cf3d357da18 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -87,10 +87,17 @@ # define ASAN_POISON_MEMORY_REGION(a, b) #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) - #if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) || defined(UNDEFINED_BEHAVIOR_SANITIZER) - #define ABORT_ON_LOGICAL_ERROR - #endif +/// We used to have only ABORT_ON_LOGICAL_ERROR macro, but most of its uses were actually in places where we didn't care about logical errors +/// but wanted to check exactly if the current build type is debug or with sanitizer. This new macro is introduced to fix those places. +#if !defined(DEBUG_OR_SANITIZER_BUILD) +# if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || defined(MEMORY_SANITIZER) \ + || defined(UNDEFINED_BEHAVIOR_SANITIZER) +# define DEBUG_OR_SANITIZER_BUILD +# endif +#endif + +#if !defined(ABORT_ON_LOGICAL_ERROR) && defined(DEBUG_OR_SANITIZER_BUILD) +# define ABORT_ON_LOGICAL_ERROR #endif /// chassert(x) is similar to assert(x), but: diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index f7919b6422c..4443f83596f 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes namespace { -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /** This visitor checks if Query Tree structure is valid after each pass * in debug build. @@ -183,7 +183,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node) for (size_t i = 0; i < passes_size; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } @@ -208,7 +208,7 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pa for (size_t i = 0; i < up_to_pass_index; ++i) { passes[i]->run(query_tree_node, current_context); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) ValidationChecker(passes[i]->getName()).visit(query_tree_node); #endif } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index cfd07c27765..955981a972d 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) #else void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) @@ -466,7 +466,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) #else void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 1be7a862438..fe678fc1eaa 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,7 @@ public: void insertData(const char * pos, size_t length) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & from, size_t n) override; #else using IColumn::insertFrom; @@ -189,7 +189,7 @@ public: void protect() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; @@ -212,7 +212,7 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 5d7350f3a79..598d501a2b8 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -337,7 +337,7 @@ bool ColumnArray::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertFrom(const IColumn & src_, size_t n) #else void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) @@ -396,7 +396,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const #else int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const @@ -543,7 +543,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 6cd3e2f6c3b..6f735fe9dc3 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,14 +84,14 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; #else void doInsertFrom(const IColumn & src_, size_t n) override; @@ -103,7 +103,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 5e455709fec..10649602920 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,7 +85,7 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } @@ -104,7 +104,7 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index b55a1f42037..e419dbd2c4c 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -123,7 +123,7 @@ public: return data->isNullAt(0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override @@ -151,7 +151,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -160,7 +160,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } #else void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } @@ -237,7 +237,7 @@ public: return data->allocatedBytes() + sizeof(s); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override #else int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index cf413f790a7..8e1a96b6ed2 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const #else int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const @@ -335,7 +335,7 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 32efeb643a6..3985a667135 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,13 +55,13 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #else void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -76,7 +76,7 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -104,7 +104,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index c735238f515..a92d54dd675 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -215,7 +215,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) #else void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) @@ -269,7 +269,7 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) #else void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) @@ -439,7 +439,7 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -603,7 +603,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const #else int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 9abddc7a26d..fa8ec55c60a 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -143,7 +143,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -220,7 +220,7 @@ public: return scattered_columns; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 1c2de203a94..db697f6372b 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) #else void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) @@ -90,7 +90,7 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -227,7 +227,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 6e88136fc50..6e7ff488f9a 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,13 +98,13 @@ public: bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t index) override; #else void doInsertFrom(const IColumn & src_, size_t index) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -137,7 +137,7 @@ public: void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override @@ -156,7 +156,7 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index fa57f35a823..fc81efaac0c 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertFrom(const IColumn & src, size_t n) #else void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) @@ -93,7 +93,7 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) ++elements_size; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index ba924c49a82..dfc592ab281 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -94,12 +94,12 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; @@ -145,7 +145,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override #else int doCompareAt(size_t, size_t, const IColumn &, int) const override diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index eb694a10b0f..237e157f420 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -159,7 +159,7 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) #else void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) @@ -191,7 +191,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -372,7 +372,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e99be07cd8d..1b74518037f 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,14 +78,14 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; #endif void insertFromFullColumn(const IColumn & src, size_t n); -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -135,7 +135,7 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 2dffddb2dc9..77bf8802ab4 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,7 +153,7 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertFrom(const IColumn & src, size_t n) #else void ColumnMap::doInsertFrom(const IColumn & src, size_t n) @@ -162,7 +162,7 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -171,7 +171,7 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -222,7 +222,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index a54071a2974..592f83732df 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -67,7 +67,7 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -83,7 +83,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index f060e74b315..11447a7966e 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -221,7 +221,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -262,7 +262,7 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertFrom(const IColumn & src, size_t n) #else void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) @@ -274,7 +274,7 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -414,7 +414,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index a6d0483e527..cf7efba6235 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,7 +69,7 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -77,7 +77,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -100,7 +100,7 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index adcd42b16e9..39e587368fe 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,7 +763,7 @@ void ColumnObject::get(size_t n, Field & res) const } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertFrom(const IColumn & src, size_t n) #else void ColumnObject::doInsertFrom(const IColumn & src, size_t n) @@ -772,7 +772,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n) insert(src[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index fadf2e18779..09c28d460e4 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -210,7 +210,7 @@ public: bool tryInsert(const Field & field) override; void insertDefault() override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else @@ -235,7 +235,7 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 809586d8810..49c953c5aab 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -252,7 +252,7 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnSparse::insertFrom(const IColumn & src, size_t n) #else void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) @@ -454,7 +454,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const #else int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 3e34d1de94a..732fad292af 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,14 +81,14 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override; #else void doInsertFrom(const IColumn & src, size_t n) override; @@ -106,7 +106,7 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 1eda9714d62..37a0d6b31a2 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -133,7 +133,7 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 602ffac65e8..157ca9fc9cd 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,7 @@ public: return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override #else void doInsertFrom(const IColumn & src_, size_t n) override @@ -169,7 +169,7 @@ public: } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; @@ -220,7 +220,7 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -250,7 +250,7 @@ public: offsets.push_back(offsets.back() + 1); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 9b822d7f570..50e698f57c2 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -205,7 +205,7 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertFrom(const IColumn & src_, size_t n) #else void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) @@ -222,7 +222,7 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) #else void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) @@ -326,7 +326,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -482,7 +482,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 38e479791d4..71a47e58401 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -66,7 +66,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else @@ -83,7 +83,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; @@ -94,7 +94,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index ec1f8e0a4d5..d6cb75679be 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,7 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; @@ -492,7 +492,7 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ee5de4c2dde..18e9cffd61a 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,7 +595,7 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertFrom(const IColumn & src_, size_t n) #else void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) @@ -604,7 +604,7 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) insertFromImpl(src_, n, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) #else void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) @@ -613,7 +613,7 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t insertRangeFromImpl(src_, start, length, nullptr); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) #else void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) @@ -1186,7 +1186,7 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const #else int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index d91b8e93a7d..2dbe1494823 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,7 +180,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src_, size_t n) override; void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; @@ -223,7 +223,7 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; #else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 19d1b800961..d5b7786a702 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -503,7 +503,7 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) #else void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 3a0acf5e312..57c108fff23 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,7 +64,7 @@ public: return data.size(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn & src, size_t n) override #else void doInsertFrom(const IColumn & src, size_t n) override @@ -73,7 +73,7 @@ public: data.push_back(assert_cast(src).getData()[n]); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertManyFrom(const IColumn & src, size_t position, size_t length) override #else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override @@ -150,7 +150,7 @@ public: } /// This method implemented in header because it could be possibly devirtualized. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override #else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override @@ -240,7 +240,7 @@ public: bool tryInsert(const DB::Field & x) override; -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 552e52cf51c..a189903b617 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,7 @@ String IColumn::dumpStructure() const return res.str(); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void IColumn::insertFrom(const IColumn & src, size_t n) #else void IColumn::doInsertFrom(const IColumn & src, size_t n) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 4b6f34e5aa2..005cb314201 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -179,7 +179,7 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertFrom(const IColumn & src, size_t n); #else void insertFrom(const IColumn & src, size_t n) @@ -191,7 +191,7 @@ public: /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; #else void insertRangeFrom(const IColumn & src, size_t start, size_t length) @@ -202,7 +202,7 @@ public: #endif /// Appends one element from other column with the same type multiple times. -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) @@ -345,7 +345,7 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; #else [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const @@ -667,7 +667,7 @@ protected: Sort full_sort, PartialSort partial_sort) const; -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) virtual void doInsertFrom(const IColumn & src, size_t n); virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index c19fb704d9b..db21ec05aae 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -26,7 +26,7 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } #else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } @@ -71,7 +71,7 @@ public: { } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertFrom(const IColumn &, size_t) override #else void doInsertFrom(const IColumn &, size_t) override @@ -80,7 +80,7 @@ public: ++s; } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override #else void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 3398452b7ee..1faabe55772 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -85,7 +85,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) void insertRangeFrom(const IColumn &, size_t, size_t) override #else void doInsertRangeFrom(const IColumn &, size_t, size_t) override diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 645f6ed79f3..240099f0ae5 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } -#if !defined(ABORT_ON_LOGICAL_ERROR) +#if !defined(DEBUG_OR_SANITIZER_BUILD) static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) #else static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index 607b583cf31..80c837ed43b 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -38,7 +38,7 @@ namespace std::erase_if(left_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); std::erase_if(right_subkeys, [&](const String & key) { return ignore_keys->contains(key); }); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) /// Compound `ignore_keys` are not yet implemented. for (const auto & ignore_key : *ignore_keys) chassert(ignore_key.find('.') == std::string_view::npos); diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 28cfa98666a..ac412684cf7 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -192,7 +192,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused] { /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug). /// Let's find them. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (size < 0) return; diff --git a/src/Common/PageCache.cpp b/src/Common/PageCache.cpp index 56bd8c1a339..d719a387e14 100644 --- a/src/Common/PageCache.cpp +++ b/src/Common/PageCache.cpp @@ -424,7 +424,7 @@ static void logUnexpectedSyscallError(std::string name) { std::string message = fmt::format("{} failed: {}", name, errnoToString()); LOG_WARNING(&Poco::Logger::get("PageCache"), "{}", message); -#if defined(ABORT_ON_LOGICAL_ERROR) +#if defined(DEBUG_OR_SANITIZER_BUILD) volatile bool true_ = true; if (true_) // suppress warning about missing [[noreturn]] abortOnFailedAssertion(message); diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index f9d0bf0e595..7a04372ffad 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { if constexpr (std::is_pointer_v) diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index d8c6e9cb99d..9b0c9aeafbe 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -166,7 +166,7 @@ TEST(Common, RWLockRecursive) auto lock2 = fifo_lock->getLock(RWLockImpl::Read, "q2"); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD /// It throws LOGICAL_ERROR EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, "q2");}); #endif diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index e9c642666d3..198f6c0ea04 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -59,7 +59,7 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( std::optional read_until_position_, std::shared_ptr cache_log_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0, file_size_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("CachedOnDiskReadBufferFromFile({})", cache_key_))) #else , log(getLogger("CachedOnDiskReadBufferFromFile")) @@ -452,7 +452,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segme { case ReadType::CACHED: { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD size_t file_size = getFileSizeFromReadBuffer(*read_buffer_for_file_segment); if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) throw Exception( @@ -937,7 +937,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (!result) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (read_type == ReadType::CACHED) { size_t cache_file_size = getFileSizeFromReadBuffer(*implementation_buffer); diff --git a/src/IO/tests/gtest_memory_resize.cpp b/src/IO/tests/gtest_memory_resize.cpp index d760a948075..c3b34c352b2 100644 --- a/src/IO/tests/gtest_memory_resize.cpp +++ b/src/IO/tests/gtest_memory_resize.cpp @@ -134,7 +134,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); ASSERT_EQ(memory.m_size, 0x8000000000000000ULL - PADDING_FOR_SIMD); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, 0x8000000000000000ULL - 1); @@ -158,7 +158,7 @@ TEST(MemoryResizeTest, SmallInitAndBigResizeOverflowWhenPadding) ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); ASSERT_EQ(memory.m_size, 1); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW_ERROR_CODE(memory.resize(0x8000000000000000ULL - (PADDING_FOR_SIMD - 1)), Exception, ErrorCodes::LOGICAL_ERROR); ASSERT_TRUE(memory.m_data); // state is intact after exception ASSERT_EQ(memory.m_capacity, PADDING_FOR_SIMD); @@ -197,7 +197,7 @@ TEST(MemoryResizeTest, BigInitAndSmallResizeOverflowWhenPadding) , ErrorCodes::ARGUMENT_OUT_OF_BOUND); } -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD { EXPECT_THROW_ERROR_CODE( { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 3c1af6538ad..b53a8b58023 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -917,8 +917,8 @@ TEST_P(SyncAsync, ExceptionOnUploadPart) { TEST_F(WBS3Test, PrefinalizeCalledMultipleTimes) { -#ifdef ABORT_ON_LOGICAL_ERROR - GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if ABORT_ON_LOGICAL_ERROR is not defined"; +#ifdef DEBUG_OR_SANITIZER_BUILD + GTEST_SKIP() << "this test trigger LOGICAL_ERROR, runs only if DEBUG_OR_SANITIZER_BUILD is not defined"; #else EXPECT_THROW({ try { diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index a3848fa3a75..a88c0de2cfe 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1007,7 +1007,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc() limits_satisfied = main_priority->collectCandidatesForEviction( desired_size, desired_elements_num, keep_up_free_space_remove_batch, stat, eviction_candidates, lock); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// Let's make sure that we correctly processed the limits. if (limits_satisfied && eviction_candidates.size() < keep_up_free_space_remove_batch) { @@ -1110,7 +1110,7 @@ void FileCache::removeAllReleasable(const UserID & user_id) { assertInitialized(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif @@ -1226,7 +1226,7 @@ void FileCache::loadMetadataImpl() if (first_exception) std::rethrow_exception(first_exception); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } @@ -1393,7 +1393,7 @@ void FileCache::loadMetadataForKeys(const fs::path & keys_dir) FileCache::~FileCache() { deactivateBackgroundOperations(); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD assertCacheCorrectness(); #endif } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 838ca0b491e..1664a91b694 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -67,7 +67,7 @@ FileSegment::FileSegment( , key_metadata(key_metadata_) , queue_iterator(queue_iterator_) , cache(cache_) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD , log(getLogger(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString()))) #else , log(getLogger("FileSegment")) @@ -385,9 +385,9 @@ void FileSegment::write(char * from, size_t size, size_t offset_in_file) try { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD /// This mutex is only needed to have a valid assertion in assertCacheCorrectness(), - /// which is only executed in debug/sanitizer builds (under ABORT_ON_LOGICAL_ERROR). + /// which is only executed in debug/sanitizer builds (under DEBUG_OR_SANITIZER_BUILD). std::lock_guard lock(write_mutex); #endif diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 1d23278a255..7e4b76d3cc6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -963,7 +963,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( } else if (!can_be_broken) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); #else LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 5d237d28089..1b57ad2b622 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -538,7 +538,7 @@ Chunk DDLQueryStatusSource::generate() ExecutionStatus status(-1, "Cannot obtain error message"); /// Replicated database retries in case of error, it should not write error status. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD bool need_check_status = true; #else bool need_check_status = !is_replicated_database; diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index ce5992c2548..364d7c69071 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -50,7 +50,7 @@ TEST(Processors, PortsNotConnected) processors->emplace_back(std::move(source)); processors->emplace_back(std::move(sink)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD try { QueryStatusPtr element; diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index c8ab2e3a973..34bc2eb2b5e 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -133,7 +133,7 @@ TEST(CheckSortedTransform, CheckBadLastRow) EXPECT_NO_THROW(executor.pull(chunk)); EXPECT_NO_THROW(executor.pull(chunk)); -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -158,7 +158,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock1) Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -181,7 +181,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock2) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } @@ -204,7 +204,7 @@ TEST(CheckSortedTransform, CheckUnsortedBlock3) PullingPipelineExecutor executor(pipeline); Chunk chunk; -#ifndef ABORT_ON_LOGICAL_ERROR +#ifndef DEBUG_OR_SANITIZER_BUILD EXPECT_THROW(executor.pull(chunk), DB::Exception); #endif } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 8d69df8de76..1306a3869c7 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -666,7 +666,7 @@ void TCPHandler::runImpl() // Server should die on std logic errors in debug, like with assert() // or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in // tests. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD catch (const std::logic_error & e) { state.io.onException(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 29631b95542..41e90aafd42 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -357,7 +357,7 @@ void RefreshTask::refreshTask() stop_requested = true; tryLogCurrentException(log, "Unexpected exception in refresh scheduling, please investigate. The view will be stopped."); -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD abortOnFailedAssertion("Unexpected exception in refresh scheduling"); #endif } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 72f725965e0..3f02486ed15 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1516,7 +1516,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; @@ -2383,7 +2383,7 @@ static void paranoidCheckForCoveredPartsInZooKeeper( const String & covering_part_name, const StorageReplicatedMergeTree & storage) { -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD constexpr bool paranoid_check_for_covered_parts_default = true; #else constexpr bool paranoid_check_for_covered_parts_default = false; From 85714e7d7e18a9d91dfe385b658e1ae1ddedb2ea Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 18 Jul 2024 11:31:23 +0000 Subject: [PATCH 25/57] fix asof join on nulls --- .../Transforms/MergeJoinTransform.cpp | 10 ++- .../0_stateless/00976_asof_join_on.reference | 78 ++++++++++++++++++- .../0_stateless/00976_asof_join_on.sql.j2 | 7 +- 3 files changed, 88 insertions(+), 7 deletions(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 3b69ddaec06..7bdb3d4f30f 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -257,8 +257,14 @@ JoinKeyRow::JoinKeyRow(const FullMergeJoinCursor & cursor, size_t pos) new_col->insertFrom(*col, pos); row.push_back(std::move(new_col)); } - if (const auto * asof_column = cursor.getAsofColumn()) + if (const IColumn * asof_column = cursor.getAsofColumn()) { + if (const auto * nullable_asof_column = checkAndGetColumn(asof_column)) + { + /// We save matched column, and since NULL do not match anything, we can't use it as a key + chassert(!nullable_asof_column->isNullAt(pos)); + asof_column = nullable_asof_column->getNestedColumnPtr().get(); + } auto new_col = asof_column->cloneEmpty(); new_col->insertFrom(*asof_column, pos); row.push_back(std::move(new_col)); @@ -1174,7 +1180,6 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (!cursors[1]->cursor.isValid() && !cursors[1]->fullyCompleted()) return Status(1); - if (auto result = handleAllJoinState()) return std::move(*result); @@ -1183,7 +1188,6 @@ IMergingAlgorithm::Status MergeJoinAlgorithm::merge() if (cursors[0]->fullyCompleted() || cursors[1]->fullyCompleted()) { - if (!cursors[0]->fullyCompleted() && isLeftOrFull(kind)) return Status(createBlockWithDefaults(0)); diff --git a/tests/queries/0_stateless/00976_asof_join_on.reference b/tests/queries/0_stateless/00976_asof_join_on.reference index e13745bed9d..80af4287d3c 100644 --- a/tests/queries/0_stateless/00976_asof_join_on.reference +++ b/tests/queries/0_stateless/00976_asof_join_on.reference @@ -1,4 +1,4 @@ -- default - +- default / join_use_nulls = 0 - 1 1 0 0 1 2 1 2 1 3 1 2 @@ -35,7 +35,7 @@ 2 1 2 3 2 2 2 3 1 2 1 2 -- full_sorting_merge - +- full_sorting_merge / join_use_nulls = 0 - 1 1 0 0 1 2 1 2 1 3 1 2 @@ -72,3 +72,77 @@ 2 1 2 3 2 2 2 3 1 2 1 2 +- default / join_use_nulls = 1 - +1 1 \N \N +1 2 1 2 +1 3 1 2 +2 1 \N \N +2 2 \N \N +2 3 2 3 +3 1 \N \N +3 2 \N \N +3 3 \N \N +9 +1 2 1 2 +1 3 1 2 +2 3 2 3 +- +1 1 1 2 +1 2 1 2 +1 3 1 4 +2 1 2 3 +2 2 2 3 +2 3 2 3 +- +1 1 1 2 +1 2 1 2 +1 3 1 4 +2 1 2 3 +2 2 2 3 +2 3 2 3 +- +1 3 1 2 +- +1 1 1 2 +1 2 1 4 +1 3 1 4 +2 1 2 3 +2 2 2 3 +1 2 1 2 +- full_sorting_merge / join_use_nulls = 1 - +1 1 \N \N +1 2 1 2 +1 3 1 2 +2 1 \N \N +2 2 \N \N +2 3 2 3 +3 1 \N \N +3 2 \N \N +3 3 \N \N +9 +1 2 1 2 +1 3 1 2 +2 3 2 3 +- +1 1 1 2 +1 2 1 2 +1 3 1 4 +2 1 2 3 +2 2 2 3 +2 3 2 3 +- +1 1 1 2 +1 2 1 2 +1 3 1 4 +2 1 2 3 +2 2 2 3 +2 3 2 3 +- +1 3 1 2 +- +1 1 1 2 +1 2 1 4 +1 3 1 4 +2 1 2 3 +2 2 2 3 +1 2 1 2 diff --git a/tests/queries/0_stateless/00976_asof_join_on.sql.j2 b/tests/queries/0_stateless/00976_asof_join_on.sql.j2 index aecf472a36c..ea642366a05 100644 --- a/tests/queries/0_stateless/00976_asof_join_on.sql.j2 +++ b/tests/queries/0_stateless/00976_asof_join_on.sql.j2 @@ -7,11 +7,13 @@ CREATE TABLE B(b UInt32, t UInt32) ENGINE = Memory; INSERT INTO A (a,t) VALUES (1,1),(1,2),(1,3), (2,1),(2,2),(2,3), (3,1),(3,2),(3,3); INSERT INTO B (b,t) VALUES (1,2),(1,4),(2,3); +{% for join_use_nulls in [0, 1] -%} {% for join_algorithm in ['default', 'full_sorting_merge'] -%} SET join_algorithm = '{{ join_algorithm }}'; -SELECT '- {{ join_algorithm }} -'; +SELECT '- {{ join_algorithm }} / join_use_nulls = {{ join_use_nulls }} -'; +set join_use_nulls = {{ join_use_nulls }}; SELECT A.a, A.t, B.b, B.t FROM A ASOF LEFT JOIN B ON A.a == B.b AND A.t >= B.t ORDER BY (A.a, A.t); SELECT count() FROM A ASOF LEFT JOIN B ON A.a == B.b AND B.t <= A.t; @@ -34,7 +36,8 @@ ASOF INNER JOIN (SELECT * FROM B UNION ALL SELECT 1, 3) AS B ON B.t <= A.t AND A WHERE B.t != 3 ORDER BY (A.a, A.t) ; -{% endfor %} +{% endfor -%} +{% endfor -%} DROP TABLE A; DROP TABLE B; From ab2522b17020f1fb31b760f0594784cd58468797 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 18 Jul 2024 13:17:34 +0000 Subject: [PATCH 26/57] fix asof join on nulls --- src/Processors/Transforms/MergeJoinTransform.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 7bdb3d4f30f..e96a75d277b 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -301,7 +301,11 @@ bool JoinKeyRow::asofMatch(const FullMergeJoinCursor & cursor, ASOFJoinInequalit if (isNullAt(*asof_row, 0) || isNullAt(*cursor.getAsofColumn(), cursor->getRow())) return false; - int cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1); + int cmp = 0; + if (const auto * nullable_column = checkAndGetColumn(cursor.getAsofColumn())) + cmp = nullable_column->getNestedColumn().compareAt(cursor->getRow(), 0, *asof_row, 1); + else + cmp = cursor.getAsofColumn()->compareAt(cursor->getRow(), 0, *asof_row, 1); return (asof_inequality == ASOFJoinInequality::Less && cmp < 0) || (asof_inequality == ASOFJoinInequality::LessOrEquals && cmp <= 0) From 093b13329c4206b4ecd604fb373d8d60cf345a1f Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 18 Jul 2024 15:46:05 +0000 Subject: [PATCH 27/57] One more check in JOIN ON ... IS NULL --- .../Passes/LogicalExpressionOptimizerPass.cpp | 46 +++++++++++++++++-- ...11_join_on_nullsafe_optimization.reference | 4 ++ .../02911_join_on_nullsafe_optimization.sql | 1 + 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 854697bca9f..7e54b5a4b42 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -67,6 +67,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } +/// Checks if the node is combination of isNull and notEquals functions of two the same arguments +bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs) +{ + QueryTreeNodePtrWithHashSet all_arguments; + for (const auto & node : nodes) + { + const auto * func_node = node->as(); + if (!func_node) + return false; + + const auto & arguments = func_node->getArguments().getNodes(); + if (func_node->getFunctionName() == "isNull" && arguments.size() == 1) + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2) + { + if (arguments[0]->isEqual(*arguments[1])) + return false; + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1])); + } + else + return false; + + if (all_arguments.size() > 2) + return false; + } + + if (all_arguments.size() != 2) + return false; + + lhs = all_arguments.begin()->node; + rhs = std::next(all_arguments.begin())->node; + return true; +} + bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) { const auto * constant_node = node->as(); @@ -212,11 +247,14 @@ private: else if (func_name == "and") { const auto & and_arguments = argument_function->getArguments().getNodes(); - bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull"); - if (all_are_is_null) + + QueryTreeNodePtr is_null_lhs_arg; + QueryTreeNodePtr is_null_rhs_arg; + if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg)) { - is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); - is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); + is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1); + is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1); + continue; } /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 4eb7e74446d..31a1cda18e7 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -35,6 +35,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +\N \N \N \N -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: -- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f7813e2a1b4..f739259caf9 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -34,6 +34,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) A SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: From cdadef78471b47d05d6d1c437a823b17f8867991 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 18 Jul 2024 21:26:33 +0200 Subject: [PATCH 28/57] Add more comments. --- src/Parsers/ASTViewTargets.h | 21 +++++++++++++++++---- src/Parsers/ParserViewTargets.h | 7 ++++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Parsers/ASTViewTargets.h b/src/Parsers/ASTViewTargets.h index 33a7bc5fcb1..12182919f0e 100644 --- a/src/Parsers/ASTViewTargets.h +++ b/src/Parsers/ASTViewTargets.h @@ -9,15 +9,20 @@ namespace DB class ASTStorage; enum class Keyword : size_t; -/// Information about the target table for a materialized view or a window view. +/// Information about target tables (external or inner) of a materialized view or a window view. +/// See ASTViewTargets for more details. struct ViewTarget { enum Kind { - /// Target table for a materialized view or a window view. + /// If `kind == ViewTarget::To` then `ViewTarget` contains information about the "TO" table of a materialized view or a window view: + /// CREATE MATERIALIZED VIEW db.mv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... + /// or + /// CREATE WINDOW VIEW db.wv_name {TO [db.]to_target | ENGINE to_engine} AS SELECT ... To, - /// Table with intermediate results for a window view. + /// If `kind == ViewTarget::Inner` then `ViewTarget` contains information about the "INNER" table of a window view: + /// CREATE WINDOW VIEW db.wv_name {INNER ENGINE inner_engine} AS SELECT ... Inner, }; @@ -42,7 +47,15 @@ std::string_view toString(ViewTarget::Kind kind); void parseFromString(ViewTarget::Kind & out, std::string_view str); -/// Information about all the target tables for a view. +/// Information about all target tables (external or inner) of a view. +/// +/// For example, for a materialized view: +/// CREATE MATERIALIZED VIEW db.mv_name [TO [db.]to_target | ENGINE to_engine] AS SELECT ... +/// this class contains information about the "TO" table: its name and database (if it's external), its UUID and engine (if it's inner). +/// +/// For a window view: +/// CREATE WINDOW VIEW db.wv_name [TO [db.]to_target | ENGINE to_engine] [INNER ENGINE inner_engine] AS SELECT ... +/// this class contains information about both the "TO" table and the "INNER" table. class ASTViewTargets : public IAST { public: diff --git a/src/Parsers/ParserViewTargets.h b/src/Parsers/ParserViewTargets.h index f5d1850e974..3af3c0b8df3 100644 --- a/src/Parsers/ParserViewTargets.h +++ b/src/Parsers/ParserViewTargets.h @@ -7,7 +7,12 @@ namespace DB { -/// Parses information about target views of a table. +/// Parses information about target tables (external or inner) of a materialized view or a window view. +/// The function parses one or multiple parts of a CREATE query looking like this: +/// TO db.table_name +/// TO INNER UUID 'XXX' +/// {ENGINE / INNER ENGINE} TableEngine(arguments) [ORDER BY ...] [SETTINGS ...] +/// Returns ASTViewTargets if succeeded. class ParserViewTargets : public IParserBase { public: From d7250c1d63c561c14f41f5f1a18f79fe0efc5972 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 18 Jul 2024 21:27:04 +0200 Subject: [PATCH 29/57] Add function ASTCreateQuery::hasInnerUUIDs(). --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Parsers/ASTCreateQuery.cpp | 7 +++++++ src/Parsers/ASTCreateQuery.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 45e2881ae5c..faa91341a7c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1131,7 +1131,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data } else { - bool has_uuid = (create.uuid != UUIDHelpers::Nil) || (create.targets && create.targets->hasInnerUUIDs()); + bool has_uuid = (create.uuid != UUIDHelpers::Nil) || create.hasInnerUUIDs(); if (has_uuid && !is_on_cluster && !internal) { /// We don't show the following error message either diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 770a63c6e75..348b54203fc 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -563,6 +563,13 @@ UUID ASTCreateQuery::getTargetInnerUUID(ViewTarget::Kind target_kind) const return UUIDHelpers::Nil; } +bool ASTCreateQuery::hasInnerUUIDs() const +{ + if (targets) + return targets->hasInnerUUIDs(); + return false; +} + std::shared_ptr ASTCreateQuery::getTargetInnerEngine(ViewTarget::Kind target_kind) const { if (targets) diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index f751a09169c..08d26f28efa 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -167,6 +167,7 @@ public: StorageID getTargetTableID(ViewTarget::Kind target_kind) const; bool hasTargetTableID(ViewTarget::Kind target_kind) const; UUID getTargetInnerUUID(ViewTarget::Kind target_kind) const; + bool hasInnerUUIDs() const; std::shared_ptr getTargetInnerEngine(ViewTarget::Kind target_kind) const; void setTargetInnerEngine(ViewTarget::Kind target_kind, ASTPtr storage_def); From f19de4effdf1e9d15acab69cecd882d7fd9b156b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 19 Jul 2024 03:38:05 +0200 Subject: [PATCH 30/57] Fix bad tests long_select_and_alter, CC @alesapin --- .../0_stateless/01338_long_select_and_alter.reference | 2 +- tests/queries/0_stateless/01338_long_select_and_alter.sh | 6 +++--- .../01338_long_select_and_alter_zookeeper.reference | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.reference b/tests/queries/0_stateless/01338_long_select_and_alter.reference index c2678e7052e..921730b17ce 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter.reference @@ -1,3 +1,3 @@ -10 5 +10 CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2b0709162a3..2659e5c16cf 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -13,15 +13,15 @@ $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 $CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" -$CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" - wait +$CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" + $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE alter_mt" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference index b4ed8efab63..9c5ad0fa468 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference @@ -1,3 +1,3 @@ -10 5 +10 CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 41e0a12f369..6eb795408f4 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -13,15 +13,15 @@ $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & -# to be sure that select took all required locks +# To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 $CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" -$CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" - wait +$CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" + $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE alter_mt" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" From 064bd643dad9153e7c35b6a235585a962d51df2d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 19 Jul 2024 12:23:56 +0000 Subject: [PATCH 31/57] Cosmetics --- src/Functions/dateDiff.cpp | 196 +++++++++++++++---------------------- 1 file changed, 79 insertions(+), 117 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 8e8865db7ed..5c46ad40daa 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -45,26 +45,26 @@ public: template void dispatchForColumns( - const IColumn & x, const IColumn & y, + const IColumn & col_x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * x_vec_16 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); - else if (const auto * x_vec_64 = checkAndGetColumn(&x)) - dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); - else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); - else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) - dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); + if (const auto * x_vec_16 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_16, col_y, timezone_x, timezone_y, result); + else if (const auto * x_vec_32 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32, col_y, timezone_x, timezone_y, result); + else if (const auto * x_vec_32_s = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_32_s, col_y, timezone_x, timezone_y, result); + else if (const auto * x_vec_64 = checkAndGetColumn(&col_x)) + dispatchForSecondColumn(*x_vec_64, col_y, timezone_x, timezone_y, result); + else if (const auto * x_const_16 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_16->getValue(), col_y, timezone_x, timezone_y, result); + else if (const auto * x_const_32 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32->getValue(), col_y, timezone_x, timezone_y, result); + else if (const auto * x_const_32_s = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_32_s->getValue(), col_y, timezone_x, timezone_y, result); + else if (const auto * x_const_64 = checkAndGetColumnConst(&col_x)) + dispatchConstForSecondColumn(x_const_64->getValue>(), col_y, timezone_x, timezone_y, result); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", @@ -73,25 +73,25 @@ public: template void dispatchForSecondColumn( - const LeftColumnType & x, const IColumn & y, + const LeftColumnType & x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); - else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_16 = checkAndGetColumnConst(&col_y)) vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32 = checkAndGetColumnConst(&col_y)) vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) + else if (const auto * y_const_32_s = checkAndGetColumnConst(&col_y)) vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); - else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) + else if (const auto * y_const_64 = checkAndGetColumnConst(&col_y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, @@ -101,17 +101,17 @@ public: template void dispatchConstForSecondColumn( - T1 x, const IColumn & y, + T1 x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, ColumnInt64::Container & result) const { - if (const auto * y_vec_16 = checkAndGetColumn(&y)) + if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) constantVector(x, *y_vec_16, timezone_x, timezone_y, result); - else if (const auto * y_vec_32 = checkAndGetColumn(&y)) + else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) constantVector(x, *y_vec_32, timezone_x, timezone_y, result); - else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); - else if (const auto * y_vec_64 = checkAndGetColumn(&y)) + else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) constantVector(x, *y_vec_64, timezone_x, timezone_y, result); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, @@ -168,8 +168,7 @@ public: Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { if constexpr (is_diff) - return static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + return static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); else { auto res = static_cast(transform_y.execute(y, timezone_y)) @@ -332,95 +331,73 @@ public: static constexpr auto name = is_relative ? "dateDiff" : "age"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 3 && arguments.size() != 4) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", - getName(), arguments.size()); + FunctionArgumentDescriptors mandatory_args{ + {"unit", static_cast(&isString), nullptr, "String"}, + {"startdate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"enddate", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} (unit) must be String", - getName()); + FunctionArgumentDescriptors optional_args{ + {"timezone", static_cast(&isString), nullptr, "String"}, + }; - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); - - if (arguments.size() == 4 && !isString(arguments[3])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument for function {} (timezone) must be String", - getName()); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 3}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * unit_column = checkAndGetColumnConst(arguments[0].column.get()); - if (!unit_column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "First argument for function {} must be constant String", - getName()); + const auto * col_unit = checkAndGetColumnConst(arguments[0].column.get()); + if (!col_unit) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant String", getName()); - String unit = Poco::toLower(unit_column->getValue()); + String unit = Poco::toLower(col_unit->getValue()); - const IColumn & x = *arguments[1].column; - const IColumn & y = *arguments[2].column; + const IColumn & col_x = *arguments[1].column; + const IColumn & col_y = *arguments[2].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); const auto & timezone_x = extractTimeZoneFromFunctionArguments(arguments, 3, 1); const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} does not support '{}' unit", getName(), unit); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); - return res; + return col_res; } private: DateDiffImpl impl{name}; @@ -437,50 +414,35 @@ public: static constexpr auto name = "timeDiff"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } - + String getName() const override { return name; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } bool isVariadic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 2; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2", - getName(), arguments.size()); + FunctionArgumentDescriptors args{ + {"first_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + {"second_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date[32] or DateTime[64]"}, + }; - if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument for function {} must be Date, Date32, DateTime or DateTime64", - getName()); - - if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument for function {} must be Date, Date32, DateTime or DateTime64", - getName() - ); + validateFunctionArguments(*this, arguments, args); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const IColumn & x = *arguments[0].column; - const IColumn & y = *arguments[1].column; + const IColumn & col_x = *arguments[0].column; + const IColumn & col_y = *arguments[1].column; - size_t rows = input_rows_count; - auto res = ColumnInt64::create(rows); + auto col_res = ColumnInt64::create(input_rows_count); - impl.dispatchForColumns>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData()); + impl.dispatchForColumns>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), col_res->getData()); - return res; + return col_res; } private: DateDiffImpl impl{name}; From a54a0614a464d686aff48aab583daa527f74e932 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 19 Jul 2024 12:27:18 +0000 Subject: [PATCH 32/57] Iterate over input_rows_count instead of a size of a random input column --- src/Functions/dateDiff.cpp | 99 ++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 48 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 5c46ad40daa..a39cbae4e30 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -47,82 +47,80 @@ public: void dispatchForColumns( const IColumn & col_x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { if (const auto * x_vec_16 = checkAndGetColumn(&col_x)) - dispatchForSecondColumn(*x_vec_16, col_y, timezone_x, timezone_y, result); + dispatchForSecondColumn(*x_vec_16, col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_vec_32 = checkAndGetColumn(&col_x)) - dispatchForSecondColumn(*x_vec_32, col_y, timezone_x, timezone_y, result); + dispatchForSecondColumn(*x_vec_32, col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_vec_32_s = checkAndGetColumn(&col_x)) - dispatchForSecondColumn(*x_vec_32_s, col_y, timezone_x, timezone_y, result); + dispatchForSecondColumn(*x_vec_32_s, col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_vec_64 = checkAndGetColumn(&col_x)) - dispatchForSecondColumn(*x_vec_64, col_y, timezone_x, timezone_y, result); + dispatchForSecondColumn(*x_vec_64, col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_const_16 = checkAndGetColumnConst(&col_x)) - dispatchConstForSecondColumn(x_const_16->getValue(), col_y, timezone_x, timezone_y, result); + dispatchConstForSecondColumn(x_const_16->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_const_32 = checkAndGetColumnConst(&col_x)) - dispatchConstForSecondColumn(x_const_32->getValue(), col_y, timezone_x, timezone_y, result); + dispatchConstForSecondColumn(x_const_32->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_const_32_s = checkAndGetColumnConst(&col_x)) - dispatchConstForSecondColumn(x_const_32_s->getValue(), col_y, timezone_x, timezone_y, result); + dispatchConstForSecondColumn(x_const_32_s->getValue(), col_y, timezone_x, timezone_y, input_rows_count, result); else if (const auto * x_const_64 = checkAndGetColumnConst(&col_x)) - dispatchConstForSecondColumn(x_const_64->getValue>(), col_y, timezone_x, timezone_y, result); + dispatchConstForSecondColumn(x_const_64->getValue>(), col_y, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchForSecondColumn( const LeftColumnType & x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) - vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); + vectorVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) - vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); + vectorVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) - vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); + vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) - vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); + vectorVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_const_16 = checkAndGetColumnConst(&col_y)) - vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); + vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_const_32 = checkAndGetColumnConst(&col_y)) - vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); + vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_const_32_s = checkAndGetColumnConst(&col_y)) - vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); + vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_const_64 = checkAndGetColumnConst(&col_y)) - vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); + vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void dispatchConstForSecondColumn( T1 x, const IColumn & col_y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { if (const auto * y_vec_16 = checkAndGetColumn(&col_y)) - constantVector(x, *y_vec_16, timezone_x, timezone_y, result); + constantVector(x, *y_vec_16, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_vec_32 = checkAndGetColumn(&col_y)) - constantVector(x, *y_vec_32, timezone_x, timezone_y, result); + constantVector(x, *y_vec_32, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_vec_32_s = checkAndGetColumn(&col_y)) - constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); + constantVector(x, *y_vec_32_s, timezone_x, timezone_y, input_rows_count, result); else if (const auto * y_vec_64 = checkAndGetColumn(&col_y)) - constantVector(x, *y_vec_64, timezone_x, timezone_y, result); + constantVector(x, *y_vec_64, timezone_x, timezone_y, input_rows_count, result); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", - name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", name); } template void vectorVector( const LeftColumnType & x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -130,14 +128,15 @@ public: const auto transform_x = TransformDateTime64(getScale(x)); const auto transform_y = TransformDateTime64(getScale(y)); - for (size_t i = 0, size = x.size(); i < size; ++i) - result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); + for (size_t i = 0; i < input_rows_count; ++i) + result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y); } template void vectorConstant( const LeftColumnType & x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & x_data = x.getData(); @@ -145,7 +144,7 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto y_value = stripDecimalFieldValue(y); - for (size_t i = 0, size = x.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y); } @@ -153,6 +152,7 @@ public: void constantVector( T1 x, const RightColumnType & y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y, + size_t input_rows_count, ColumnInt64::Container & result) const { const auto & y_data = y.getData(); @@ -160,19 +160,22 @@ public: const auto transform_y = TransformDateTime64(getScale(y)); const auto x_value = stripDecimalFieldValue(x); - for (size_t i = 0, size = y.size(); i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y); } template Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { + auto res = static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + if constexpr (is_diff) - return static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + { + return res; + } else { - auto res = static_cast(transform_y.execute(y, timezone_y)) - - static_cast(transform_x.execute(x, timezone_x)); + /// Adjust res: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; @@ -373,27 +376,27 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "years" || unit == "yy" || unit == "yyyy") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "quarter" || unit == "quarters" || unit == "qq" || unit == "q") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "month" || unit == "months" || unit == "mm" || unit == "m") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "week" || unit == "weeks" || unit == "wk" || unit == "ww") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "day" || unit == "days" || unit == "dd" || unit == "d") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "hour" || unit == "hours" || unit == "hh" || unit == "h") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "minute" || unit == "minutes" || unit == "mi" || unit == "n") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "second" || unit == "seconds" || unit == "ss" || unit == "s") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "millisecond" || unit == "milliseconds" || unit == "ms") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") - impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, col_res->getData()); + impl.template dispatchForColumns>(col_x, col_y, timezone_x, timezone_y, input_rows_count, col_res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); @@ -440,7 +443,7 @@ public: auto col_res = ColumnInt64::create(input_rows_count); - impl.dispatchForColumns>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), col_res->getData()); + impl.dispatchForColumns>(col_x, col_y, DateLUT::instance(), DateLUT::instance(), input_rows_count, col_res->getData()); return col_res; } From 0cab22fd16caf260306e1b1feb77d2ddcced5205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 19 Jul 2024 16:44:56 +0200 Subject: [PATCH 33/57] Attempt to fix flakiness of 01194_http_query_id --- tests/queries/0_stateless/01194_http_query_id.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01194_http_query_id.sh b/tests/queries/0_stateless/01194_http_query_id.sh index 5aebdc10dfc..fac17cca3c6 100755 --- a/tests/queries/0_stateless/01194_http_query_id.sh +++ b/tests/queries/0_stateless/01194_http_query_id.sh @@ -4,14 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="http://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?session_id=test_01194" rnd=$RANDOM +url="${CLICKHOUSE_URL}&session_id=test_01194_$RANDOM" ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,1" > /dev/null ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,2" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,3" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,4" > /dev/null -${CLICKHOUSE_CURL} -sS "$url" --data "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CURL} -sS "$url&query=SELECT+count(DISTINCT+query_id)+FROM+system.query_log+WHERE+current_database+LIKE+currentDatabase()+AND+query+LIKE+'SELECT+''test_01194'',$rnd%25'" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data "SELECT count(DISTINCT query_id) FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE 'SELECT ''test_01194'',$rnd%'" From a11e89f4f70b101553a138d367d7f6dcd8318554 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 19 Jul 2024 19:00:45 +0100 Subject: [PATCH 34/57] impl --- base/base/defines.h | 6 +----- src/Common/Exception.cpp | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/base/base/defines.h b/base/base/defines.h index cf3d357da18..5685a6d9833 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -96,10 +96,6 @@ # endif #endif -#if !defined(ABORT_ON_LOGICAL_ERROR) && defined(DEBUG_OR_SANITIZER_BUILD) -# define ABORT_ON_LOGICAL_ERROR -#endif - /// chassert(x) is similar to assert(x), but: /// - works in builds with sanitizers, not only in debug builds /// - tries to print failed assertion into server log @@ -108,7 +104,7 @@ /// Also it makes sense to call abort() instead of __builtin_unreachable() in debug builds, /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) - #if defined(ABORT_ON_LOGICAL_ERROR) +# if defined(DEBUG_OR_SANITIZER_BUILD) // clang-format off #include namespace DB diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 111280074dd..33befa64946 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -64,7 +64,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc { // In debug builds and builds with sanitizers, treat LOGICAL_ERROR as an assertion failure. // Log the message before we fail. -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD if (code == ErrorCodes::LOGICAL_ERROR) { abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); @@ -443,7 +443,7 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} // NOLINT(bugprone-empty-catch) -#ifdef ABORT_ON_LOGICAL_ERROR +#ifdef DEBUG_OR_SANITIZER_BUILD try { throw; From c948103dee50bf5bddeff9af485d4df2acc8b0f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 19 Jul 2024 22:23:50 +0200 Subject: [PATCH 35/57] Better tests --- .../0_stateless/01338_long_select_and_alter.reference | 2 +- tests/queries/0_stateless/01338_long_select_and_alter.sh | 6 +++--- .../01338_long_select_and_alter_zookeeper.reference | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.reference b/tests/queries/0_stateless/01338_long_select_and_alter.reference index 921730b17ce..276d6bcc29d 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter.reference @@ -1,3 +1,3 @@ 5 -10 +5 CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2659e5c16cf..08609546ff5 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=MergeTree() ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & @@ -18,10 +18,10 @@ sleep 2 $CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" -wait - $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" +wait + $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE alter_mt" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference index 9c5ad0fa468..aab1b93f6bd 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference @@ -1,3 +1,3 @@ 5 -10 +5 CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 6eb795408f4..def6d2ab127 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" -$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" +$CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & @@ -18,10 +18,10 @@ sleep 2 $CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" -wait - $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" +wait + $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE alter_mt" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" From 8a67713e63bbaec8bf820bd6813affefe305d2cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 01:09:45 +0200 Subject: [PATCH 36/57] Fix error --- tests/queries/0_stateless/01338_long_select_and_alter.sh | 4 ++-- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 08609546ff5..5d2759ac884 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=MergeTree() ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=MergeTree() ORDER BY key" $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query # To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index def6d2ab127..593a96a7cc8 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS alter_mt" -$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" +$CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key Int64, value String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_mt', '1') ORDER BY key" $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number - 1 AS x, toString(x) FROM numbers(5)" @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query # To be sure that select took all required locks for better test sensitivity, although it isn't guaranteed (then the test will also succeed). sleep 2 -$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value UInt64" +$CLICKHOUSE_CLIENT --query "ALTER TABLE alter_mt MODIFY COLUMN value Int64" $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM alter_mt" From c786e6eb49152d7992ee3ee5c76300b7e4712309 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Jul 2024 21:27:35 +0200 Subject: [PATCH 37/57] Fix tests --- tests/queries/0_stateless/01338_long_select_and_alter.reference | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.reference b/tests/queries/0_stateless/01338_long_select_and_alter.reference index 276d6bcc29d..027109252e1 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter.reference @@ -1,3 +1,3 @@ 5 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = MergeTree\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference index aab1b93f6bd..65e638bc3a4 100644 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.reference @@ -1,3 +1,3 @@ 5 5 -CREATE TABLE default.alter_mt\n(\n `key` UInt64,\n `value` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_mt\n(\n `key` Int64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01338_long_select_and_alter_zookeeper_default/alter_mt\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 From 5ff125e37ef35ce5b451e54482e92d7663e25bdb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 10:32:54 +0200 Subject: [PATCH 38/57] Miscellaneous --- src/Common/CurrentMetrics.cpp | 8 ++++---- src/Databases/DatabaseLazy.cpp | 4 ++-- src/Databases/DatabasesCommon.cpp | 8 ++++---- src/Parsers/ExpressionListParsers.cpp | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 7c97e73f278..1011ab12d15 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -234,10 +234,10 @@ M(PartsCommitted, "Deprecated. See PartsActive.") \ M(PartsPreActive, "The part is in data_parts, but not used for SELECTs.") \ M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \ - M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \ - M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \ - M(AttachedView, "Active view, used by current and upcoming SELECTs.") \ - M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \ + M(AttachedDatabase, "Active databases.") \ + M(AttachedTable, "Active tables.") \ + M(AttachedView, "Active views.") \ + M(AttachedDictionary, "Active dictionaries.") \ M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \ M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \ M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \ diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 5017c9b25cb..ca30ee6db15 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -195,7 +195,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n snapshot_detached_tables.erase(table_name); } - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::add(CurrentMetrics::AttachedTable); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -221,7 +221,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta .metadata_path = getObjectMetadataPath(table_name), .is_permanently = false}); - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::sub(CurrentMetrics::AttachedTable); } return res; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fe0baf30e57..6ccaf811764 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -289,8 +289,8 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n tables.erase(it); table_storage->is_detached = true; - if (table_storage->isSystemStorage() == false) - CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1); + if (!table_storage->isSystemStorage() && database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::sub(getAttachedCounterForStorage(table_storage)); auto table_id = table_storage->getStorageID(); if (table_id.hasUUID()) @@ -334,8 +334,8 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + if (!table->isSystemStorage() && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) + CurrentMetrics::add(getAttachedCounterForStorage(table)); } void DatabaseWithOwnTablesBase::shutdown() diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index fff8383e7b3..f97c042e91e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2743,7 +2743,7 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator if (op.function_name == "and" && layers.back()->between_counter) { - layers.back()->between_counter--; + --layers.back()->between_counter; op = finish_between_operator; } From 7f03b189667161de61e33523586a594694b02071 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 10:49:53 +0200 Subject: [PATCH 39/57] Fix two terrible bugs --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ea10ad59db4..8c1a3cb4dff 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1606,7 +1606,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, - "Too many tables in the Clickhouse. " + "Too many tables. " "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", table_num_limit, table_count); } From a6f1c46448a17f294cc6047dfc3fef3802b40abf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 11:08:38 +0200 Subject: [PATCH 40/57] Miscellaneous --- src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++---- .../test_table_db_num_limit/test.py | 22 +++++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 8c1a3cb4dff..2a4a02597fe 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -82,13 +81,13 @@ #include #include -#include #include #include #include #include + namespace CurrentMetrics { extern const Metric AttachedTable; @@ -166,8 +165,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (db_count >= db_num_limit) throw Exception(ErrorCodes::TOO_MANY_DATABASES, - "Too many databases in the Clickhouse. " - "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", + "Too many databases. " + "The limit (server configuration parameter `max_database_num_to_throw`) is set to {}, the current number of databases is {}", db_num_limit, db_count); } @@ -1607,7 +1606,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, "Too many tables. " - "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", + "The limit (server configuration parameter `max_table_num_to_throw`) is set to {}, the current number of tables is {}", table_num_limit, table_count); } diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index aa8030b077c..56403d165b2 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -4,8 +4,8 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["config/config.xml"], with_zookeeper=True +node = cluster.add_instance( + "node", main_configs=["config/config.xml"] ) @@ -22,22 +22,26 @@ def started_cluster(): def test_table_db_limit(started_cluster): for i in range(10): - node1.query("create database db{}".format(i)) + node.query("create database db{}".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create database db_exp".format(i)) + node.query("create database db_exp".format(i)) assert "TOO_MANY_DATABASES" in str(exp_info) for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) + + # This checks that system tables are not accounted in the number of tables. + node.query("system flush logs") - node1.query("system flush logs") for i in range(10): - node1.query("drop table t{}".format(i)) + node.query("drop table t{}".format(i)) + for i in range(10): - node1.query("create table t{} (a Int32) Engine = Log".format(i)) + node.query("create table t{} (a Int32) Engine = Log".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: - node1.query("create table default.tx (a Int32) Engine = Log") + node.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) From efd5ff9f4c63c82d77cfda43af786ad3b7a4b8e0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 21 Jul 2024 09:15:13 +0000 Subject: [PATCH 41/57] Automatic style fix --- tests/integration/test_table_db_num_limit/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index 56403d165b2..a7bb04905f2 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -4,9 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", main_configs=["config/config.xml"] -) +node = cluster.add_instance("node", main_configs=["config/config.xml"]) @pytest.fixture(scope="module") From 7ed9ab2338f7de5a2495aad267cc688799c5ff18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 11:29:12 +0200 Subject: [PATCH 42/57] Remove wrong logic from InterpreterCreateQuery --- src/Interpreters/InterpreterCreateQuery.cpp | 12 ++++++------ tests/integration/test_table_db_num_limit/test.py | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 2a4a02597fe..7e0b6eb4193 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -146,21 +146,21 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) } auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; - if (db_num_limit > 0) + if (db_num_limit > 0 && !internal) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); - std::vector system_databases = { + std::initializer_list system_databases = + { DatabaseCatalog::TEMPORARY_DATABASE, DatabaseCatalog::SYSTEM_DATABASE, DatabaseCatalog::INFORMATION_SCHEMA, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, - DatabaseCatalog::DEFAULT_DATABASE }; for (const auto & system_database : system_databases) { - if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) - db_count--; + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(std::string(system_database))) + --db_count; } if (db_count >= db_num_limit) @@ -1600,7 +1600,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; - if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) + if (table_num_limit > 0 && !internal) { UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); if (table_count >= table_num_limit) diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index 56403d165b2..aa6404c385c 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -21,7 +21,8 @@ def started_cluster(): def test_table_db_limit(started_cluster): - for i in range(10): + # By the way, default database already exists. + for i in range(9): node.query("create database db{}".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: From 19f8e1768f5095618c4e0cded2c8299968aaf0fb Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 20 Jul 2024 16:36:19 +0200 Subject: [PATCH 43/57] Add const to cycles. --- src/Databases/DatabaseReplicated.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Parsers/ASTViewTargets.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 25d1ad90a3c..4c079ae5300 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -734,7 +734,7 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ if (create->targets) { - for (auto inner_table_engine : create->targets->getInnerEngines()) + for (const auto & inner_table_engine : create->targets->getInnerEngines()) checkTableEngine(*create, *inner_table_engine, query_context); } } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index faa91341a7c..342374aa580 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1384,7 +1384,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (create.targets) { - for (auto inner_table_engine : create.targets->getInnerEngines()) + for (const auto & inner_table_engine : create.targets->getInnerEngines()) { if (isReplicated(*inner_table_engine)) is_storage_replicated = true; diff --git a/src/Parsers/ASTViewTargets.cpp b/src/Parsers/ASTViewTargets.cpp index 38f103b6e55..8ee98e704df 100644 --- a/src/Parsers/ASTViewTargets.cpp +++ b/src/Parsers/ASTViewTargets.cpp @@ -43,7 +43,7 @@ std::vector ASTViewTargets::getKinds() const { std::vector kinds; kinds.reserve(targets.size()); - for (auto & target : targets) + for (const auto & target : targets) kinds.push_back(target.kind); return kinds; } @@ -121,7 +121,7 @@ void ASTViewTargets::resetInnerUUIDs() bool ASTViewTargets::hasInnerUUIDs() const { - for (auto & target : targets) + for (const auto & target : targets) { if (target.inner_uuid != UUIDHelpers::Nil) return true; From 0c2c027af63fcbababffbe3a39ed2631884e1938 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Jul 2024 12:30:20 +0200 Subject: [PATCH 44/57] Remove bad tests @azat --- ...2_part_log_rmt_fetch_merge_error.reference | 10 ----- .../03002_part_log_rmt_fetch_merge_error.sql | 35 ---------------- ..._part_log_rmt_fetch_mutate_error.reference | 10 ----- .../03002_part_log_rmt_fetch_mutate_error.sql | 41 ------------------- 4 files changed, 96 deletions(-) delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference delete mode 100644 tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference deleted file mode 100644 index b19d389d8d0..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 1 -after -rmt_master NewPart 0 1 -rmt_master MergeParts 0 1 -rmt_slave MergeParts 1 0 -rmt_slave DownloadPart 0 2 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql deleted file mode 100644 index 548a8e5570a..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by key settings always_fetch_merged_part=0; --- always_fetch_merged_part=1, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by key settings always_fetch_merged_part=1; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; - -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; --- sleep few seconds to try rmt_slave to fetch the part and reflect this error --- in system.part_log -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference deleted file mode 100644 index aac9e7527d1..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference +++ /dev/null @@ -1,10 +0,0 @@ -before -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 1 -rmt_slave MutatePart 1 0 -after -rmt_master NewPart 0 1 -rmt_master MutatePart 0 1 -rmt_slave DownloadPart 0 2 -rmt_slave MutatePart 1 0 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql deleted file mode 100644 index d8b5ebb3148..00000000000 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sql +++ /dev/null @@ -1,41 +0,0 @@ --- Tags: no-replicated-database, no-parallel, no-shared-merge-tree --- SMT: The merge process is completely different from RMT - -drop table if exists rmt_master; -drop table if exists rmt_slave; - -create table rmt_master (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'master') order by tuple() settings always_fetch_merged_part=0, old_parts_lifetime=600; --- prefer_fetch_merged_part_*_threshold=0, consider this table as a "slave" -create table rmt_slave (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', 'slave') order by tuple() settings prefer_fetch_merged_part_time_threshold=0, prefer_fetch_merged_part_size_threshold=0, old_parts_lifetime=600; - -insert into rmt_master values (1); - -system sync replica rmt_master; -system sync replica rmt_slave; -system stop replicated sends rmt_master; -system stop pulling replication log rmt_slave; -alter table rmt_master update key=key+100 where 1 settings alter_sync=1; - --- first we need to make the rmt_master execute mutation so that it will have --- the part, and rmt_slave will consider it instead of performing mutation on --- it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored -select sleep(3) format Null; -system start pulling replication log rmt_slave; --- and sleep few more seconds to try rmt_slave to fetch the part and reflect --- this error in system.part_log -select sleep(3) format Null; - -system flush logs; -select 'before'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -system start replicated sends rmt_master; -select sleep(3) format Null; -system sync replica rmt_slave; - -system flush logs; -select 'after'; -select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; - -drop table rmt_master; -drop table rmt_slave; From 4b8b9b1503800398ccbf68e6a6134838fd67dac5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 21 Jul 2024 16:30:59 +0000 Subject: [PATCH 45/57] Fix UB in function "age" --- src/Functions/DateTimeTransforms.h | 5 ++++- tests/queries/0_stateless/02477_age_datetime64.reference | 5 +++++ tests/queries/0_stateless/02477_age_datetime64.sql | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 34c59ecab08..5f745f3ccad 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1954,7 +1954,10 @@ struct ToRelativeSubsecondNumImpl return t.value; if (scale > scale_multiplier) return t.value / (scale / scale_multiplier); - return t.value * (scale_multiplier / scale); + return static_cast(t.value) * static_cast((scale_multiplier / scale)); + /// Casting ^^: All integers are Int64, yet if t.value is big enough the multiplication can still + /// overflow which is UB. This place is too low-level and generic to check if t.value is sane. + /// Therefore just let it overflow safely and don't bother further. } static Int64 execute(UInt32 t, const DateLUTImpl &) { diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index 3b4459dd26d..fb085f461c9 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -111,3 +111,8 @@ SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), ma 1 SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); 1 +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) + +-8973935999 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 1bed93991ca..b5fa4da8837 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -75,3 +75,7 @@ SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- UBsan bug #66638 +set session_timezone = 'UTC'; +SELECT age('second', toDateTime(1157339245694594829, 6, 'UTC'), toDate('2015-08-18')) From dd9fe61d1a973a7fa528259b507218cf264548fe Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 21 Jul 2024 17:44:32 +0200 Subject: [PATCH 46/57] CI: New Release workflow updates and fixes --- .github/actions/release/action.yml | 27 +++++------- tests/ci/artifactory.py | 56 ++++++++++++++----------- tests/ci/ci_utils.py | 3 +- tests/ci/create_release.py | 66 +++++++++++++++++++----------- 4 files changed, 87 insertions(+), 65 deletions(-) diff --git a/.github/actions/release/action.yml b/.github/actions/release/action.yml index 99ec02662f6..c3897682a33 100644 --- a/.github/actions/release/action.yml +++ b/.github/actions/release/action.yml @@ -62,8 +62,8 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "update ChangeLog" - [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 + git checkout master + python3 ./tests/ci/create_release.py --set-progress-started --progress "update changelog, docker version, security" echo "List versions" ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv echo "Update docker version" @@ -96,17 +96,13 @@ runs: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} ### Changelog category (leave one): - Not for changelog (changelog entry is not required) - - name: Reset changes if Dry-run - if: ${{ inputs.dry-run }} + - name: Complete previous steps and Restore git state + if: ${{ inputs.type == 'patch' }} shell: bash run: | - git reset --hard HEAD - - name: Checkout back to GITHUB_REF - shell: bash - run: | - git checkout "$GITHUB_REF_NAME" - # set current progress to OK python3 ./tests/ci/create_release.py --set-progress-completed + git reset --hard HEAD + git checkout "$GITHUB_REF_NAME" - name: Create GH Release shell: bash if: ${{ inputs.type == 'patch' }} @@ -146,24 +142,23 @@ runs: if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "docker server release" cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker server release" export CHECK_NAME="Docker server image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./tests/ci/create_release.py --set-progress-completed + python3 ./create_release.py --set-progress-completed - name: Docker clickhouse/clickhouse-keeper building if: ${{ inputs.type == 'patch' }} shell: bash run: | - python3 ./tests/ci/create_release.py --set-progress-started --progress "docker keeper release" cd "./tests/ci" + python3 ./create_release.py --set-progress-started --progress "docker keeper release" export CHECK_NAME="Docker keeper image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - python3 ./tests/ci/create_release.py --set-progress-completed - - name: Set Release progress completed + python3 ./create_release.py --set-progress-completed + - name: Set current Release progress to Completed with OK shell: bash run: | - # If we here - set completed status, to post proper Slack OK or FAIL message in the next step python3 ./tests/ci/create_release.py --set-progress-started --progress "completed" python3 ./tests/ci/create_release.py --set-progress-completed - name: Post Slack Message diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 98a0345c6bd..86dcaf79854 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -43,7 +43,6 @@ class R2MountPoint: self.bucket_name = self._PROD_BUCKET_NAME self.aux_mount_options = "" - self.async_mount = False if self.app == MountPointApp.S3FS: self.cache_dir = "/home/ubuntu/s3fs_cache" # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs @@ -57,7 +56,6 @@ class R2MountPoint: self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" elif self.app == MountPointApp.RCLONE: # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return - self.async_mount = True self.cache_dir = "/home/ubuntu/rclone_cache" self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose @@ -85,10 +83,12 @@ class R2MountPoint: Shell.run(_UNMOUNT_CMD) Shell.run(_MKDIR_CMD) Shell.run(_MKDIR_FOR_CACHE) - # didn't manage to use simple run() and not block or fail - Shell.run_as_daemon(self.mount_cmd) - if self.async_mount: - time.sleep(3) + if self.app == MountPointApp.S3FS: + Shell.run(self.mount_cmd, check=True) + else: + # didn't manage to use simple run() and without blocking or failure + Shell.run_as_daemon(self.mount_cmd) + time.sleep(3) Shell.run(_TEST_MOUNT_CMD, check=True) @classmethod @@ -107,6 +107,7 @@ class DebianArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/deb" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -154,9 +155,8 @@ class DebianArtifactory: print("Running test command:") print(f" {cmd}") Shell.run(cmd, check=True) - release_info = ReleaseInfo.from_file() - release_info.debian_command = debian_command - release_info.dump() + self.release_info.debian_command = debian_command + self.release_info.dump() def _copy_if_not_exists(src: Path, dst: Path) -> Path: @@ -177,6 +177,7 @@ class RpmArtifactory: _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -230,9 +231,8 @@ class RpmArtifactory: print("Running test command:") print(f" {cmd}") Shell.run(cmd, check=True) - release_info = ReleaseInfo.from_file() - release_info.rpm_command = rpm_command - release_info.dump() + self.release_info.rpm_command = rpm_command + self.release_info.dump() class TgzArtifactory: @@ -240,6 +240,7 @@ class TgzArtifactory: _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.release_info = release_info self.codename = release_info.codename self.version = release_info.version if dry_run: @@ -290,9 +291,8 @@ class TgzArtifactory: expected_checksum == actual_checksum ), f"[{actual_checksum} != {expected_checksum}]" Shell.run("rm /tmp/tmp.tgz*") - release_info = ReleaseInfo.from_file() - release_info.tgz_command = cmd - release_info.dump() + self.release_info.tgz_command = cmd + self.release_info.dump() def parse_args() -> argparse.Namespace: @@ -340,9 +340,7 @@ def parse_args() -> argparse.Namespace: if __name__ == "__main__": args = parse_args() - assert args.dry_run - release_info = ReleaseInfo.from_file() """ Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: ERROR : IO error: NotImplemented: versionId not implemented @@ -350,26 +348,38 @@ if __name__ == "__main__": """ mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) if args.export_debian: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_DEB) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_DEB + ) as release_info: mp.init() DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.export_rpm: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_RPM) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_RPM + ) as release_info: mp.init() RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.export_tgz: - with ReleaseContextManager(release_progress=ReleaseProgress.EXPORT_TGZ) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.EXPORT_TGZ + ) as release_info: mp.init() TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() mp.teardown() if args.test_debian: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_DEB) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_DEB + ) as release_info: DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_tgz: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_TGZ) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_TGZ + ) as release_info: TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() if args.test_rpm: - with ReleaseContextManager(release_progress=ReleaseProgress.TEST_RPM) as _: + with ReleaseContextManager( + release_progress=ReleaseProgress.TEST_RPM + ) as release_info: RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index efbf014cd52..9a1b12af310 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -182,10 +182,11 @@ class Shell: check=False, ) if result.returncode == 0: + print(f"stdout: {result.stdout.strip()}") res = result.stdout else: print( - f"ERROR: stdout {result.stdout.strip()}, stderr {result.stderr.strip()}" + f"ERROR: stdout: {result.stdout.strip()}, stderr: {result.stderr.strip()}" ) if check: assert result.returncode == 0 diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 4347cfebb54..a0b4083b673 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -43,6 +43,7 @@ class ReleaseProgress: TEST_TGZ = "test TGZ packages" TEST_RPM = "test RPM packages" TEST_DEB = "test DEB packages" + COMPLETED = "completed" class ReleaseProgressDescription: @@ -108,6 +109,12 @@ class ReleaseInfo: release_progress: str = "" progress_description: str = "" + def is_patch(self): + return self.release_branch != "master" + + def is_new_release_branch(self): + return self.release_branch == "master" + @staticmethod def from_file() -> "ReleaseInfo": with open(RELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: @@ -126,12 +133,12 @@ class ReleaseInfo: release_tag = None previous_release_tag = None previous_release_sha = None - codename = None + codename = "" assert release_type in ("patch", "new") if release_type == "new": # check commit_ref is right and on a right branch Shell.run( - f"git merge-base --is-ancestor origin/{commit_ref} origin/master", + f"git merge-base --is-ancestor {commit_ref} origin/master", check=True, ) with checkout(commit_ref): @@ -146,9 +153,6 @@ class ReleaseInfo: git.latest_tag == expected_prev_tag ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" release_tag = version.describe - codename = ( - VersionType.STABLE - ) # dummy value (artifactory won't be updated for new release) previous_release_tag = expected_prev_tag previous_release_sha = Shell.run_strict( f"git rev-parse {previous_release_tag}" @@ -205,7 +209,7 @@ class ReleaseInfo: and commit_sha and release_tag and version - and codename in ("lts", "stable") + and (codename in ("lts", "stable") or release_type == "new") ) self.release_branch = release_branch @@ -320,24 +324,27 @@ class ReleaseInfo: Shell.run( f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) - self.version_bump_pr = GHActions.get_pr_url_by_branch( - repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors - ) + self.version_bump_pr = "dry-run" + else: + self.version_bump_pr = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch_upd_version_contributors + ) def update_release_info(self, dry_run: bool) -> "ReleaseInfo": - branch = f"auto/{release_info.release_tag}" - if not dry_run: - url = GHActions.get_pr_url_by_branch(repo=GITHUB_REPOSITORY, branch=branch) - else: - url = "dry-run" - - print(f"ChangeLog PR url [{url}]") - self.changelog_pr = url - print(f"Release url [{url}]") - self.release_url = ( - f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" - ) - self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.release_branch} clickhouse --version" + if self.release_branch != "master": + branch = f"auto/{release_info.release_tag}" + if not dry_run: + url = GHActions.get_pr_url_by_branch( + repo=GITHUB_REPOSITORY, branch=branch + ) + else: + url = "dry-run" + print(f"ChangeLog PR url [{url}]") + self.changelog_pr = url + print(f"Release url [{url}]") + self.release_url = f"https://github.com/{GITHUB_REPOSITORY}/releases/tag/{self.release_tag}" + if self.release_progress == ReleaseProgress.COMPLETED: + self.docker_command = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version" self.dump() return self @@ -712,13 +719,22 @@ if __name__ == "__main__": if args.post_status: release_info = ReleaseInfo.from_file() release_info.update_release_info(dry_run=args.dry_run) - if release_info.debian_command: + if release_info.is_new_release_branch(): + title = "New release branch" + else: + title = "New release" + if ( + release_info.progress_description == ReleaseProgressDescription.OK + and release_info.release_progress == ReleaseProgress.COMPLETED + ): + title = "Completed: " + title CIBuddy(dry_run=args.dry_run).post_done( - f"New release issued", dataclasses.asdict(release_info) + title, dataclasses.asdict(release_info) ) else: + title = "Failed: " + title CIBuddy(dry_run=args.dry_run).post_critical( - f"Failed to issue new release", dataclasses.asdict(release_info) + title, dataclasses.asdict(release_info) ) if args.set_progress_started: From 2be21fe05c6b952735fc6895c0286b177864dde5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 05:53:12 +0200 Subject: [PATCH 47/57] Fix RocksDB bs --- tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 index 6121db6d6a2..4ab98201eed 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 @@ -1,4 +1,4 @@ --- Tags: use-rocksdb, long +-- Tags: use-rocksdb, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings SET join_algorithm = 'direct'; @@ -41,4 +41,3 @@ ON rdb.key == t1.k; {% for table_size in [10, 65555, 100000] -%} DROP TABLE IF EXISTS rdb_{{ table_size }}; {% endfor -%} - From 989476d5234bffd223988bf8aa88e2021e999574 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 06:41:52 +0200 Subject: [PATCH 48/57] Make test `01592_long_window_functions1` lighter --- .../0_stateless/01592_long_window_functions1.sql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index d2d32e24eaa..671245599cc 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -8,14 +8,14 @@ drop table if exists stack; set max_insert_threads = 4; create table stack(item_id Int64, brand_id Int64, rack_id Int64, dt DateTime, expiration_dt DateTime, quantity UInt64) -Engine = MergeTree -partition by toYYYYMM(dt) +Engine = MergeTree +partition by toYYYYMM(dt) order by (brand_id, toStartOfHour(dt)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into stack -select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, +insert into stack +select number%99991, number%11, number%1111, toDateTime('2020-01-01 00:00:00')+number/100, toDateTime('2020-02-01 00:00:00')+number/10, intDiv(number,100)+1 -from numbers_mt(10000000); +from numbers_mt(1000000); select '---- arrays ----'; @@ -32,8 +32,8 @@ select '---- window f ----'; select cityHash64( toString( groupArray (tuple(*) ) )) from ( select brand_id, rack_id, quantity from ( select brand_id, rack_id, quantity, row_number() over (partition by brand_id, rack_id order by quantity) rn - from stack ) as t0 - where rn <= 2 + from stack ) as t0 + where rn <= 2 order by brand_id, rack_id, quantity ) t; From 049056e55aa974ecad8786fb1ec738c96a191118 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 22 Jul 2024 05:48:40 +0000 Subject: [PATCH 49/57] Fix style --- src/Functions/dateDiff.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index a39cbae4e30..faab42817ba 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -26,8 +26,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } From 1a3559fbc3c5257c4e0f5ec16eda3d09d8ebcca0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 08:46:41 +0200 Subject: [PATCH 50/57] What if I will change this test? --- tests/integration/test_ssl_cert_authentication/test.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 756a1e1996c..3af88759e82 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -43,15 +43,10 @@ def started_cluster(): config = """ - none - + strict {certificateFile} {privateKeyFile} {caConfig} - - - AcceptCertificateHandler - """ From 9d2dac90ed30643bd8c4fb49280060432677905f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 22 Jul 2024 07:27:46 +0000 Subject: [PATCH 51/57] Functions [a-g]: Use input_rows_count where appropriate --- src/Functions/acosh.cpp | 11 ++++--- src/Functions/addMicroseconds.cpp | 1 + src/Functions/addMilliseconds.cpp | 1 + src/Functions/addNanoseconds.cpp | 1 + src/Functions/aes_encrypt_mysql.cpp | 1 - src/Functions/appendTrailingCharIfAbsent.cpp | 9 +++--- src/Functions/ascii.cpp | 2 -- src/Functions/asinh.cpp | 11 ++++--- src/Functions/atan2.cpp | 11 ++++--- src/Functions/atanh.cpp | 11 ++++--- src/Functions/base58Encode.cpp | 2 ++ src/Functions/base64Decode.cpp | 2 ++ src/Functions/base64Encode.cpp | 2 ++ src/Functions/base64URLDecode.cpp | 2 ++ src/Functions/base64URLEncode.cpp | 2 ++ src/Functions/byteSize.cpp | 8 ++--- src/Functions/byteSwap.cpp | 1 + src/Functions/caseWithExpression.cpp | 3 +- src/Functions/convertCharset.cpp | 12 +++---- src/Functions/cosh.cpp | 11 ++++--- .../countSubstringsCaseInsensitiveUTF8.cpp | 3 +- src/Functions/dateName.cpp | 16 +++++----- src/Functions/degrees.cpp | 22 +++++++------ src/Functions/filesystem.cpp | 2 +- src/Functions/formatDateTime.cpp | 32 +++++++++---------- src/Functions/formatQuery.cpp | 10 +++--- src/Functions/formatReadable.h | 31 +++++++++--------- src/Functions/geohashDecode.cpp | 16 ++++------ src/Functions/geohashEncode.cpp | 14 ++++---- src/Functions/transform.cpp | 3 +- 30 files changed, 131 insertions(+), 122 deletions(-) diff --git a/src/Functions/acosh.cpp b/src/Functions/acosh.cpp index 5b071da9c40..2bab84c77af 100644 --- a/src/Functions/acosh.cpp +++ b/src/Functions/acosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AcoshName - { - static constexpr auto name = "acosh"; - }; - using FunctionAcosh = FunctionMathUnary>; + +struct AcoshName +{ + static constexpr auto name = "acosh"; +}; +using FunctionAcosh = FunctionMathUnary>; } diff --git a/src/Functions/addMicroseconds.cpp b/src/Functions/addMicroseconds.cpp index 0dcd6b4452f..8c0ae06dcd0 100644 --- a/src/Functions/addMicroseconds.cpp +++ b/src/Functions/addMicroseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMicroseconds) { factory.registerFunction(); diff --git a/src/Functions/addMilliseconds.cpp b/src/Functions/addMilliseconds.cpp index 0e2b696d367..83e1f96ec4b 100644 --- a/src/Functions/addMilliseconds.cpp +++ b/src/Functions/addMilliseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddMilliseconds) { factory.registerFunction(); diff --git a/src/Functions/addNanoseconds.cpp b/src/Functions/addNanoseconds.cpp index 93eadc814d9..8f9a54752b9 100644 --- a/src/Functions/addNanoseconds.cpp +++ b/src/Functions/addNanoseconds.cpp @@ -6,6 +6,7 @@ namespace DB { using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; + REGISTER_FUNCTION(AddNanoseconds) { factory.registerFunction(); diff --git a/src/Functions/aes_encrypt_mysql.cpp b/src/Functions/aes_encrypt_mysql.cpp index fb120151c25..33733f92b27 100644 --- a/src/Functions/aes_encrypt_mysql.cpp +++ b/src/Functions/aes_encrypt_mysql.cpp @@ -7,7 +7,6 @@ namespace DB { - namespace { diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp index a5554171aaa..0e57d5c55ce 100644 --- a/src/Functions/appendTrailingCharIfAbsent.cpp +++ b/src/Functions/appendTrailingCharIfAbsent.cpp @@ -57,7 +57,7 @@ private: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const auto & column = arguments[0].column; const auto & column_char = arguments[1].column; @@ -80,14 +80,13 @@ private: auto & dst_data = col_res->getChars(); auto & dst_offsets = col_res->getOffsets(); - const auto size = src_offsets.size(); - dst_data.resize(src_data.size() + size); - dst_offsets.resize(size); + dst_data.resize(src_data.size() + input_rows_count); + dst_offsets.resize(input_rows_count); ColumnString::Offset src_offset{}; ColumnString::Offset dst_offset{}; - for (const auto i : collections::range(0, size)) + for (size_t i = 0; i < input_rows_count; ++i) { const auto src_length = src_offsets[i] - src_offset; memcpySmallAllowReadWriteOverflow15(&dst_data[dst_offset], &src_data[src_offset], src_length); diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index 7c8158b53d4..0d50e5d203b 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -45,9 +45,7 @@ struct AsciiImpl size_t size = data.size() / n; for (size_t i = 0; i < size; ++i) - { res[i] = doAscii(data, i * n, n); - } } [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) diff --git a/src/Functions/asinh.cpp b/src/Functions/asinh.cpp index 6af832ae07c..b5e3626148f 100644 --- a/src/Functions/asinh.cpp +++ b/src/Functions/asinh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AsinhName - { - static constexpr auto name = "asinh"; - }; - using FunctionAsinh = FunctionMathUnary>; + +struct AsinhName +{ + static constexpr auto name = "asinh"; +}; +using FunctionAsinh = FunctionMathUnary>; } diff --git a/src/Functions/atan2.cpp b/src/Functions/atan2.cpp index 42294e11458..218f4c5406f 100644 --- a/src/Functions/atan2.cpp +++ b/src/Functions/atan2.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct Atan2Name - { - static constexpr auto name = "atan2"; - }; - using FunctionAtan2 = FunctionMathBinaryFloat64>; + +struct Atan2Name +{ + static constexpr auto name = "atan2"; +}; +using FunctionAtan2 = FunctionMathBinaryFloat64>; } diff --git a/src/Functions/atanh.cpp b/src/Functions/atanh.cpp index fab25414725..a36f5bcbcf0 100644 --- a/src/Functions/atanh.cpp +++ b/src/Functions/atanh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct AtanhName - { - static constexpr auto name = "atanh"; - }; - using FunctionAtanh = FunctionMathUnary>; + +struct AtanhName +{ + static constexpr auto name = "atanh"; +}; +using FunctionAtanh = FunctionMathUnary>; } diff --git a/src/Functions/base58Encode.cpp b/src/Functions/base58Encode.cpp index cf790ebddab..3ae2fb12c5e 100644 --- a/src/Functions/base58Encode.cpp +++ b/src/Functions/base58Encode.cpp @@ -3,8 +3,10 @@ namespace DB { + REGISTER_FUNCTION(Base58Encode) { factory.registerFunction>(); } + } diff --git a/src/Functions/base64Decode.cpp b/src/Functions/base64Decode.cpp index 4d06ac99d6f..349475af3f0 100644 --- a/src/Functions/base64Decode.cpp +++ b/src/Functions/base64Decode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Decode) { FunctionDocumentation::Description description = R"(Accepts a String and decodes it from base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Throws an exception in case of an error. Alias: FROM_BASE64.)"; @@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Decode) /// MySQL compatibility alias. factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64Encode.cpp b/src/Functions/base64Encode.cpp index 64142995552..fe0fa642599 100644 --- a/src/Functions/base64Encode.cpp +++ b/src/Functions/base64Encode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64Encode) { FunctionDocumentation::Description description = R"(Encodes a String as base64, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-4). Alias: TO_BASE64.)"; @@ -19,6 +20,7 @@ REGISTER_FUNCTION(Base64Encode) /// MySQL compatibility alias. factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::Case::Insensitive); } + } #endif diff --git a/src/Functions/base64URLDecode.cpp b/src/Functions/base64URLDecode.cpp index f5766dc60bd..f256e111619 100644 --- a/src/Functions/base64URLDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLDecode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/base64URLEncode.cpp b/src/Functions/base64URLEncode.cpp index 73a465a30c5..215712f7586 100644 --- a/src/Functions/base64URLEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,6 +5,7 @@ namespace DB { + REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; @@ -16,6 +17,7 @@ REGISTER_FUNCTION(Base64URLEncode) factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } + } #endif diff --git a/src/Functions/byteSize.cpp b/src/Functions/byteSize.cpp index 93a3a86641a..d366a1b2e12 100644 --- a/src/Functions/byteSize.cpp +++ b/src/Functions/byteSize.cpp @@ -67,11 +67,11 @@ public: const IColumn * column = arguments[arg_num].column.get(); if (arg_num == 0) - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] = column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] = column->byteSizeAt(row); else - for (size_t row_num = 0; row_num < input_rows_count; ++row_num) - vec_res[row_num] += column->byteSizeAt(row_num); + for (size_t row = 0; row < input_rows_count; ++row) + vec_res[row] += column->byteSizeAt(row); } return result_col; diff --git a/src/Functions/byteSwap.cpp b/src/Functions/byteSwap.cpp index 6c824b851b0..2094ec4fa1a 100644 --- a/src/Functions/byteSwap.cpp +++ b/src/Functions/byteSwap.cpp @@ -10,6 +10,7 @@ extern const int NOT_IMPLEMENTED; namespace { + template requires std::is_integral_v T byteSwap(T x) diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 71fccc8436e..f0a620489ef 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -98,8 +98,7 @@ public: /// Execute transform. ColumnsWithTypeAndName transform_args{args.front(), src_array_col, dst_array_col, args.back()}; - return FunctionFactory::instance().get("transform", context)->build(transform_args) - ->execute(transform_args, result_type, input_rows_count); + return FunctionFactory::instance().get("transform", context)->build(transform_args)->execute(transform_args, result_type, input_rows_count); } private: diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp index b3b7394acb9..d998e88e7c2 100644 --- a/src/Functions/convertCharset.cpp +++ b/src/Functions/convertCharset.cpp @@ -88,7 +88,8 @@ private: static void convert(const String & from_charset, const String & to_charset, const ColumnString::Chars & from_chars, const ColumnString::Offsets & from_offsets, - ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets) + ColumnString::Chars & to_chars, ColumnString::Offsets & to_offsets, + size_t input_rows_count) { auto converter_from = getConverter(from_charset); auto converter_to = getConverter(to_charset); @@ -96,12 +97,11 @@ private: ColumnString::Offset current_from_offset = 0; ColumnString::Offset current_to_offset = 0; - size_t size = from_offsets.size(); - to_offsets.resize(size); + to_offsets.resize(input_rows_count); PODArray uchars; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { size_t from_string_size = from_offsets[i] - current_from_offset - 1; @@ -184,7 +184,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const ColumnWithTypeAndName & arg_from = arguments[0]; const ColumnWithTypeAndName & arg_charset_from = arguments[1]; @@ -204,7 +204,7 @@ public: if (const ColumnString * col_from = checkAndGetColumn(arg_from.column.get())) { auto col_to = ColumnString::create(); - convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets()); + convert(charset_from, charset_to, col_from->getChars(), col_from->getOffsets(), col_to->getChars(), col_to->getOffsets(), input_rows_count); return col_to; } else diff --git a/src/Functions/cosh.cpp b/src/Functions/cosh.cpp index 54b52051aab..f4302292303 100644 --- a/src/Functions/cosh.cpp +++ b/src/Functions/cosh.cpp @@ -5,11 +5,12 @@ namespace DB { namespace { - struct CoshName - { - static constexpr auto name = "cosh"; - }; - using FunctionCosh = FunctionMathUnary>; + +struct CoshName +{ + static constexpr auto name = "cosh"; +}; +using FunctionCosh = FunctionMathUnary>; } diff --git a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp index 3f71bca63d2..99ae4f1927e 100644 --- a/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp +++ b/src/Functions/countSubstringsCaseInsensitiveUTF8.cpp @@ -13,8 +13,7 @@ struct NameCountSubstringsCaseInsensitiveUTF8 static constexpr auto name = "countSubstringsCaseInsensitiveUTF8"; }; -using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch< - CountSubstringsImpl>; +using FunctionCountSubstringsCaseInsensitiveUTF8 = FunctionsStringSearch>; } diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 8165ea1b8d3..846cb87f1ee 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -109,14 +109,14 @@ public: ColumnPtr executeImpl( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, - [[maybe_unused]] size_t input_rows_count) const override + size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", @@ -127,7 +127,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto * times = checkAndGetColumn(arguments[1].column.get()); if (!times) @@ -144,7 +144,7 @@ public: String date_part = date_part_column->getValue(); const DateLUTImpl * time_zone_tmp; - if (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(arguments, 2, 1); else time_zone_tmp = &DateLUT::instance(); @@ -175,7 +175,7 @@ public: using TimeType = DateTypeToTimeType; callOnDatePartWriter(date_part, [&](const auto & writer) { - for (size_t i = 0; i < times_data.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v) { diff --git a/src/Functions/degrees.cpp b/src/Functions/degrees.cpp index 8646eb54d9a..94b5ce3682c 100644 --- a/src/Functions/degrees.cpp +++ b/src/Functions/degrees.cpp @@ -7,18 +7,20 @@ namespace DB { namespace { - struct DegreesName - { - static constexpr auto name = "degrees"; - }; - Float64 degrees(Float64 r) - { - Float64 degrees = r * (180 / M_PI); - return degrees; - } +struct DegreesName +{ + static constexpr auto name = "degrees"; +}; + +Float64 degrees(Float64 r) +{ + Float64 degrees = r * (180 / M_PI); + return degrees; +} + +using FunctionDegrees = FunctionMathUnary>; - using FunctionDegrees = FunctionMathUnary>; } REGISTER_FUNCTION(Degrees) diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 9fbf9b0cbe7..9b168f3f088 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -91,7 +91,7 @@ public: auto col_res = ColumnVector::create(col_str->size()); auto & data = col_res->getData(); - for (size_t i = 0; i < col_str->size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { auto disk_name = col_str->getDataAt(i).toString(); if (auto it = disk_map.find(disk_name); it != disk_map.end()) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index f89afd67e78..f33b7849a43 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -848,7 +848,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { ColumnPtr res; if constexpr (support_integer == SupportInteger::Yes) @@ -862,17 +862,17 @@ public: if (!castType(arguments[0].type.get(), [&](const auto & type) { using FromDataType = std::decay_t; - if (!(res = executeType(arguments, result_type))) + if (!(res = executeType(arguments, result_type, input_rows_count))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64.", arguments[0].column->getName(), getName()); return true; })) { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Integer or DateTime.", arguments[0].column->getName(), getName()); @@ -881,10 +881,10 @@ public: } else { - if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)))) + if (!((res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)) + || (res = executeType(arguments, result_type, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of function {}, must be Date or DateTime.", arguments[0].column->getName(), getName()); @@ -894,7 +894,7 @@ public: } template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const { auto non_const_datetime = arguments[0].column->convertToFullColumnIfConst(); auto * times = checkAndGetColumn(non_const_datetime.get()); @@ -955,13 +955,11 @@ public: else time_zone = &DateLUT::instance(); - const auto & vec = times->getData(); - auto col_res = ColumnString::create(); auto & res_data = col_res->getChars(); auto & res_offsets = col_res->getOffsets(); - res_data.resize(vec.size() * (out_template_size + 1)); - res_offsets.resize(vec.size()); + res_data.resize(input_rows_count * (out_template_size + 1)); + res_offsets.resize(input_rows_count); if constexpr (format_syntax == FormatSyntax::MySQL) { @@ -990,9 +988,11 @@ public: } } + const auto & vec = times->getData(); + auto * begin = reinterpret_cast(res_data.data()); auto * pos = begin; - for (size_t i = 0; i < vec.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { if (!const_time_zone_column && arguments.size() > 2) { diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index d10b3f9a5b7..4e3f302ce36 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -75,7 +75,7 @@ public: if (const ColumnString * col_query_string = checkAndGetColumn(col_query.get())) { auto col_res = ColumnString::create(); - formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map); + formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map, input_rows_count); if (error_handling == ErrorHandling::Null) return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); @@ -92,16 +92,16 @@ private: const ColumnString::Offsets & offsets, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets, - ColumnUInt8::MutablePtr & res_null_map) const + ColumnUInt8::MutablePtr & res_null_map, + size_t input_rows_count) const { - const size_t size = offsets.size(); - res_offsets.resize(size); + res_offsets.resize(input_rows_count); res_data.resize(data.size()); size_t prev_offset = 0; size_t res_data_size = 0; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const char * begin = reinterpret_cast(&data[prev_offset]); const char * end = begin + offsets[i] - prev_offset - 1; diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 487ec9d79d0..9161ab43e28 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -55,19 +55,19 @@ public: bool useDefaultImplementationForConstants() const override { return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { ColumnPtr res; - if (!((res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)) - || (res = executeType(arguments)))) + if (!((res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)) + || (res = executeType(arguments, input_rows_count)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); @@ -76,7 +76,7 @@ public: private: template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { if (const ColumnVector * col_from = checkAndGetColumn>(arguments[0].column.get())) { @@ -85,13 +85,12 @@ private: const typename ColumnVector::Container & vec_from = col_from->getData(); ColumnString::Chars & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - data_to.resize(size * 2); - offsets_to.resize(size); + data_to.resize(input_rows_count * 2); + offsets_to.resize(input_rows_count); WriteBufferFromVector buf_to(data_to); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Impl::format(static_cast(vec_from[i]), buf_to); writeChar(0, buf_to); diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index 96ad7dacfc4..cace6c09fec 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -51,21 +51,19 @@ public: } template - bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column) const + bool tryExecute(const IColumn * encoded_column, ColumnPtr & result_column, size_t input_rows_count) const { const auto * encoded = checkAndGetColumn(encoded_column); if (!encoded) return false; - const size_t count = encoded->size(); - - auto latitude = ColumnFloat64::create(count); - auto longitude = ColumnFloat64::create(count); + auto latitude = ColumnFloat64::create(input_rows_count); + auto longitude = ColumnFloat64::create(input_rows_count); ColumnFloat64::Container & lon_data = longitude->getData(); ColumnFloat64::Container & lat_data = latitude->getData(); - for (size_t i = 0; i < count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { std::string_view encoded_string = encoded->getDataAt(i).toView(); geohashDecode(encoded_string.data(), encoded_string.size(), &lon_data[i], &lat_data[i]); @@ -79,13 +77,13 @@ public: return true; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * encoded = arguments[0].column.get(); ColumnPtr res_column; - if (tryExecute(encoded, res_column) || - tryExecute(encoded, res_column)) + if (tryExecute(encoded, res_column, input_rows_count) || + tryExecute(encoded, res_column, input_rows_count)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}", diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 034c8188b63..c49acddd81f 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -53,7 +53,7 @@ public: return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); @@ -65,26 +65,24 @@ public: precision = arguments[2].column; ColumnPtr res_column; - vector(longitude, latitude, precision.get(), res_column); + vector(longitude, latitude, precision.get(), res_column, input_rows_count); return res_column; } private: - void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result, size_t input_rows_count) const { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + out_offsets.resize(input_rows_count); + out_vec.resize(input_rows_count * (GEOHASH_MAX_TEXT_LENGTH + 1)); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const Float64 longitude_value = lon_column->getFloat64(i); const Float64 latitude_value = lat_column->getFloat64(i); diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 0dbc9946710..68500779f93 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -138,8 +138,7 @@ namespace } } - ColumnPtr executeImpl( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { std::call_once(once, [&] { initialize(arguments, result_type); }); From 0f327869132940b3bae932730cb2ce2f9c394163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 22 Jul 2024 10:46:42 +0200 Subject: [PATCH 52/57] Better random and queries --- tests/queries/0_stateless/01194_http_query_id.sh | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01194_http_query_id.sh b/tests/queries/0_stateless/01194_http_query_id.sh index fac17cca3c6..42321112185 100755 --- a/tests/queries/0_stateless/01194_http_query_id.sh +++ b/tests/queries/0_stateless/01194_http_query_id.sh @@ -4,14 +4,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -rnd=$RANDOM -url="${CLICKHOUSE_URL}&session_id=test_01194_$RANDOM" +rnd="$CLICKHOUSE_DATABASE" +url="${CLICKHOUSE_URL}&session_id=test_01194_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,1" > /dev/null ${CLICKHOUSE_CURL} -sS "$url&query=SELECT+'test_01194',$rnd,2" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,3" > /dev/null ${CLICKHOUSE_CURL} -sS "$url" --data "SELECT 'test_01194',$rnd,4" > /dev/null -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data "SELECT count(DISTINCT query_id) FROM system.query_log WHERE current_database = currentDatabase() AND event_date >= yesterday() AND query LIKE 'SELECT ''test_01194'',$rnd%'" +$CLICKHOUSE_CLIENT -q " + SELECT + count(DISTINCT query_id) + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE 'SELECT ''test_01194'',$rnd%' + AND query_id != queryID()" From 255dcec501e7506291cee44f5c9eb30b5eec7e99 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 22 Jul 2024 11:10:34 +0200 Subject: [PATCH 53/57] Fix 02241_join_rocksdb_bs --- .../02241_join_rocksdb_bs.reference | 56 ------------------- .../0_stateless/02241_join_rocksdb_bs.sql.j2 | 20 +++---- 2 files changed, 7 insertions(+), 69 deletions(-) diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference index 8416a2991c1..4dff9ef38ef 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.reference +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.reference @@ -10,59 +10,3 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 index 4ab98201eed..e5703f99d62 100644 --- a/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 +++ b/tests/queries/0_stateless/02241_join_rocksdb_bs.sql.j2 @@ -1,4 +1,4 @@ --- Tags: use-rocksdb, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings +-- Tags: use-rocksdb, long, no-s3-storage SET join_algorithm = 'direct'; @@ -13,27 +13,21 @@ INSERT INTO rdb_{{ table_size }} SELECT (sipHash64(number) % {{ table_size }}) as key, ('val' || toString(key)) AS value FROM numbers_mt({{ table_size }}); -{% for block_size in [10, 11, 128, 129, 65505, 65506, 70000] -%} - -{% if block_size * 5000 > table_size -%} - -SET max_block_size = {{ block_size }}; - {% for right_size in [table_size // 2, table_size + table_size // 4 + 1] -%} SELECT count() == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 INNER JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; SELECT count() == {{ right_size }} and countIf(value != '') == (SELECT count() FROM rdb_{{ table_size }} WHERE key < {{ right_size }}) FROM (SELECT number as k FROM numbers_mt({{ right_size }})) as t1 LEFT JOIN rdb_{{ table_size }} as rdb -ON rdb.key == t1.k; - -{% endfor -%} - -{% endif -%} +ON rdb.key == t1.k +{% if table_size < 100 %}SETTINGS max_block_size = 1{% endif -%} +; {% endfor -%} {% endfor -%} From e708219f6aecfe9934827f453665e03142ad5112 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 22 Jul 2024 13:01:27 +0200 Subject: [PATCH 54/57] CI: Print instance info in runner's init script --- tests/ci/worker/init_runner.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index d6cdb6d9c57..1bfeeb38c15 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -50,7 +50,7 @@ set -uo pipefail # set accordingly to a runner role # #################################### -echo "Running init v1" +echo "Running init v1.1" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner @@ -66,6 +66,11 @@ bash /usr/local/share/scripts/init-network.sh RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" export LABELS +echo "Instance Labels: $LABELS" + +LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) +export LIFE_CYCLE +echo "Instance lifecycle: $LIFE_CYCLE" # Refresh CloudWatch agent config aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json @@ -124,10 +129,6 @@ terminate_decrease_and_exit() { declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh check_spot_instance_is_old() { - # This function should be executed ONLY BETWEEN runnings. - # It's unsafe to execute while the runner is working! - local LIFE_CYCLE - LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle) if [ "$LIFE_CYCLE" == "spot" ]; then local UPTIME UPTIME=$(< /proc/uptime) From cd700c59b1d0b894a7a8461a33d994732b7864f0 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 22 Jul 2024 13:17:25 +0200 Subject: [PATCH 55/57] minor fix --- tests/ci/auto_release.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 39ab3156c80..f2386fe207f 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -191,7 +191,7 @@ def main(): title=f"Auto Release Status for {release_info.release_branch}", body=release_info.to_dict(), ) - if args.post_auto_release_complete: + elif args.post_auto_release_complete: assert args.wf_status, "--wf-status Required with --post-auto-release-complete" if args.wf_status != SUCCESS: CIBuddy(dry_run=False).post_job_error( From 69ad8feb9078421aca99709ffb839e2f3b923427 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 22 Jul 2024 13:38:10 +0200 Subject: [PATCH 56/57] add instance type --- tests/ci/worker/init_runner.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 1bfeeb38c15..5177e112edd 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -72,6 +72,9 @@ LIFE_CYCLE=$(curl -s --fail http://169.254.169.254/latest/meta-data/instance-lif export LIFE_CYCLE echo "Instance lifecycle: $LIFE_CYCLE" +INSTANCE_TYPE=$(ec2metadata --instance-type) +echo "Instance type: $INSTANCE_TYPE" + # Refresh CloudWatch agent config aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json systemctl restart amazon-cloudwatch-agent.service From fa0f760fd9afd652dee50b89fd8d176d4e031174 Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 22 Jul 2024 15:46:27 +0200 Subject: [PATCH 57/57] CI: CI Buddy to notify about fatal workflow failures --- .github/workflows/backport_branches.yml | 14 ++++++++- .github/workflows/master.yml | 38 +++++++------------------ .github/workflows/merge_queue.yml | 13 ++++++++- .github/workflows/nightly.yml | 17 +++++++++++ .github/workflows/pull_request.yml | 13 ++++++++- .github/workflows/release_branches.yml | 14 ++++++++- .yamllint | 6 ++++ tests/ci/ci_buddy.py | 37 +++++++++++++++++++++--- tests/ci/ci_utils.py | 27 ++++++++++++++++++ 9 files changed, 143 insertions(+), 36 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 9645d0e46de..50f4f503f5d 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -241,8 +241,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug @@ -257,6 +258,7 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check @@ -264,3 +266,13 @@ jobs: # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 09acef5eb8b..b28d87ee31f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -121,34 +121,6 @@ jobs: runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} - MarkReleaseReady: - if: ${{ !failure() && !cancelled() }} - needs: [RunConfig, Builds_1, Builds_2] - runs-on: [self-hosted, style-checker-aarch64] - steps: - - name: Debug - run: | - echo need with different filters - cat << 'EOF' - ${{ toJSON(needs) }} - ${{ toJSON(needs.*.result) }} - no failures ${{ !contains(needs.*.result, 'failure') }} - no skips ${{ !contains(needs.*.result, 'skipped') }} - no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - EOF - - name: Not ready - # fail the job to be able to restart it - if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }} - run: exit 1 - - name: Check out repository code - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - uses: ClickHouse/checkout@v1 - - name: Mark Commit Release Ready - if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }} - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 mark_release_ready.py - FinishCheck: if: ${{ !cancelled() }} needs: [RunConfig, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] @@ -160,3 +132,13 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 31a65ac3d15..db89825a99a 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -93,7 +93,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -101,6 +101,17 @@ jobs: - name: Check out repository code uses: ClickHouse/checkout@v1 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bffe5b4c1bf..fd5b5eefcc4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -44,3 +44,20 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true + CheckWorkflow: + if: ${{ !cancelled() }} + needs: [RunConfig, BuildDockers] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 5124e4dba2c..9930cf6dde4 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -151,7 +151,7 @@ jobs: data: ${{ needs.RunConfig.outputs.data }} CheckReadyForMerge: - if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + if: ${{ !cancelled() }} # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] @@ -161,9 +161,20 @@ jobs: with: filter: tree:0 - name: Check and set merge status + if: ${{ needs.StyleCheck.result == 'success' }} run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status ################################# Stage Final ################################# # diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 6a18999d74e..50565112825 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -441,8 +441,9 @@ jobs: runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !cancelled() }} needs: + - RunConfig - DockerServerImage - DockerKeeperImage - Builds_Report @@ -478,9 +479,20 @@ jobs: with: clear-repository: true - name: Finish label + if: ${{ !failure() }} run: | cd "$GITHUB_WORKSPACE/tests/ci" # update mergeable check python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + - name: Check Workflow results + run: | + export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" + cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + ${{ toJson(needs) }} + EOF + echo "::group::Workflow results" + python3 -m json.tool "$WORKFLOW_RESULT_FILE" + echo "::endgroup::" + python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.yamllint b/.yamllint index f144e2d47b1..7fb741ec9f4 100644 --- a/.yamllint +++ b/.yamllint @@ -14,3 +14,9 @@ rules: comments: min-spaces-from-content: 1 document-start: disable + colons: disable + indentation: disable + line-length: disable + trailing-spaces: disable + truthy: disable + new-line-at-end-of-file: disable diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index 3eba5532e66..688c7d59988 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -1,3 +1,4 @@ +import argparse import json import os from typing import Union, Dict @@ -7,7 +8,7 @@ import requests from botocore.exceptions import ClientError from pr_info import PRInfo -from ci_utils import Shell +from ci_utils import Shell, GHActions class CIBuddy: @@ -29,6 +30,11 @@ class CIBuddy: self.commit_url = pr_info.commit_html_url self.sha = pr_info.sha[:10] + def check_workflow(self): + res = GHActions.get_workflow_job_result(GHActions.ActionsNames.RunConfig) + if res != GHActions.ActionStatuses.SUCCESS: + self.post_job_error("Workflow Configuration Failed", critical=True) + @staticmethod def _get_webhooks(): name = "ci_buddy_web_hooks" @@ -139,7 +145,30 @@ class CIBuddy: self.post(message) +def parse_args(): + parser = argparse.ArgumentParser("CI Buddy bot notifies about CI events") + parser.add_argument( + "--check-wf-status", + action="store_true", + help="Checks workflow status", + ) + parser.add_argument( + "--test", + action="store_true", + help="for test and debug", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="dry run mode", + ) + return parser.parse_args(), parser + + if __name__ == "__main__": - # test - buddy = CIBuddy(dry_run=True) - buddy.post_job_error("TEst") + args, parser = parse_args() + + if args.test: + CIBuddy(dry_run=True).post_job_error("TEst") + elif args.check_wf_status: + CIBuddy(dry_run=args.dry_run).check_workflow() diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 9a1b12af310..1963e3f39d0 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,3 +1,4 @@ +import json import os import re import subprocess @@ -11,6 +12,9 @@ import requests class Envs: GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") + WORKFLOW_RESULT_FILE = os.getenv( + "WORKFLOW_RESULT_FILE", "/tmp/workflow_results.json" + ) LABEL_CATEGORIES = { @@ -79,6 +83,29 @@ def normalize_string(string: str) -> str: class GHActions: + class ActionsNames: + RunConfig = "RunConfig" + + class ActionStatuses: + ERROR = "error" + FAILURE = "failure" + PENDING = "pending" + SUCCESS = "success" + + @staticmethod + def get_workflow_job_result(wf_job_name: str) -> Optional[str]: + if not Path(Envs.WORKFLOW_RESULT_FILE).exists(): + print( + f"ERROR: Failed to get workflow results from file [{Envs.WORKFLOW_RESULT_FILE}]" + ) + return None + with open(Envs.WORKFLOW_RESULT_FILE, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + if wf_job_name in res: + return res[wf_job_name]["result"] # type: ignore + else: + return None + @staticmethod def print_in_group(group_name: str, lines: Union[Any, List[Any]]) -> None: lines = list(lines)