From 3f9e9a7025bec607bb1e646556c415e3336c796d Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Fri, 16 Apr 2021 23:18:39 +0300 Subject: [PATCH 01/32] Add INTERSECT and EXCEPT --- src/Common/ErrorCodes.cpp | 6 +- src/Interpreters/InterpreterFactory.cpp | 18 +- .../InterpreterIntersectOrExcept.cpp | 116 +++++++++++ .../InterpreterIntersectOrExcept.h | 35 ++++ src/Parsers/ASTIntersectOrExcept.cpp | 28 +++ src/Parsers/ASTIntersectOrExcept.h | 18 ++ src/Parsers/ParserIntersectOrExcept.cpp | 50 +++++ src/Parsers/ParserIntersectOrExcept.h | 14 ++ src/Parsers/ParserQueryWithOutput.cpp | 36 ++-- .../QueryPlan/IntersectOrExceptStep.cpp | 38 ++++ .../QueryPlan/IntersectOrExceptStep.h | 26 +++ .../Transforms/IntersectOrExceptTransform.cpp | 192 ++++++++++++++++++ .../Transforms/IntersectOrExceptTransform.h | 53 +++++ 13 files changed, 606 insertions(+), 24 deletions(-) create mode 100644 src/Interpreters/InterpreterIntersectOrExcept.cpp create mode 100644 src/Interpreters/InterpreterIntersectOrExcept.h create mode 100644 src/Parsers/ASTIntersectOrExcept.cpp create mode 100644 src/Parsers/ASTIntersectOrExcept.h create mode 100644 src/Parsers/ParserIntersectOrExcept.cpp create mode 100644 src/Parsers/ParserIntersectOrExcept.h create mode 100644 src/Processors/QueryPlan/IntersectOrExceptStep.cpp create mode 100644 src/Processors/QueryPlan/IntersectOrExceptStep.h create mode 100644 src/Processors/Transforms/IntersectOrExceptTransform.cpp create mode 100644 src/Processors/Transforms/IntersectOrExceptTransform.h diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index a2cd65137c0..0d1fd5bd7d8 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -533,7 +533,11 @@ M(564, INTERSERVER_SCHEME_DOESNT_MATCH) \ M(565, TOO_MANY_PARTITIONS) \ M(566, CANNOT_RMDIR) \ - \ + M(567, DUPLICATED_PART_UUIDS) \ + M(568, RAFT_ERROR) \ + M(569, MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD) \ + M(570, DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD) \ + M(571, INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH) \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ M(1001, STD_EXCEPTION) \ diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 15e4c52f040..e0f6479cc0e 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -1,14 +1,17 @@ #include #include #include -#include -#include #include +#include #include #include +#include #include #include +#include +#include #include +#include #include #include #include @@ -24,11 +27,9 @@ #include #include #include -#include -#include #include -#include #include +#include #include #include @@ -44,9 +45,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -65,7 +68,6 @@ #include #include #include -#include #include #include @@ -109,6 +111,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & ProfileEvents::increment(ProfileEvents::SelectQuery); return std::make_unique(query, context, options); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { ProfileEvents::increment(ProfileEvents::InsertQuery); diff --git a/src/Interpreters/InterpreterIntersectOrExcept.cpp b/src/Interpreters/InterpreterIntersectOrExcept.cpp new file mode 100644 index 00000000000..c85bd29e16f --- /dev/null +++ b/src/Interpreters/InterpreterIntersectOrExcept.cpp @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH; +} + +InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ptr_, ContextPtr context_) + : query_ptr(query_ptr_), context(Context::createCopy(context_)) +{ + ASTIntersectOrExcept * ast = query_ptr->as(); + size_t num_children = ast->children.size(); + nested_interpreters.resize(num_children); + for (size_t i = 0; i < num_children; ++i) + { + nested_interpreters[i] = buildCurrentChildInterpreter(ast->children[i]); + } + + Blocks headers(num_children); + for (size_t query_num = 0; query_num < num_children; ++query_num) + headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); + + result_header = getCommonHeader(headers); +} + + +Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) +{ + size_t num_selects = headers.size(); + Block common_header = headers.front(); + size_t num_columns = common_header.columns(); + + for (size_t query_num = 1; query_num < num_selects; ++query_num) + { + if (headers[query_num].columns() != num_columns) + throw Exception( + "Different number of columns in " + + toString(query_ptr->as()->is_except ? "EXCEPT" : "INTERSECT") + + " elements:\n" + common_header.dumpNames() + "\nand\n" + + headers[query_num].dumpNames() + "\n", + ErrorCodes::INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH); + } + + std::vector columns(num_selects); + + for (size_t column_num = 0; column_num < num_columns; ++column_num) + { + for (size_t i = 0; i < num_selects; ++i) + columns[i] = &headers[i].getByPosition(column_num); + + ColumnWithTypeAndName & result_elem = common_header.getByPosition(column_num); + result_elem = getLeastSuperColumn(columns); + } + + return common_header; +} + + +std::unique_ptr +InterpreterIntersectOrExcept::buildCurrentChildInterpreter(const ASTPtr & ast_ptr_) +{ + if (ast_ptr_->as()) + return std::make_unique(ast_ptr_, context, SelectQueryOptions()); + else + return std::make_unique(ast_ptr_, context, SelectQueryOptions()); +} + +void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) +{ + size_t num_plans = nested_interpreters.size(); + + std::vector> plans(num_plans); + DataStreams data_streams(num_plans); + + for (size_t i = 0; i < num_plans; ++i) + { + plans[i] = std::make_unique(); + nested_interpreters[i]->buildQueryPlan(*plans[i]); + data_streams[i] = plans[i]->getCurrentDataStream(); + } + + auto max_threads = context->getSettingsRef().max_threads; + auto step = std::make_unique( + query_ptr->as()->is_except, std::move(data_streams), result_header, max_threads); + query_plan.unitePlans(std::move(step), std::move(plans)); +} + +BlockIO InterpreterIntersectOrExcept::execute() +{ + BlockIO res; + + QueryPlan query_plan; + buildQueryPlan(query_plan); + + auto pipeline = query_plan.buildQueryPipeline( + QueryPlanOptimizationSettings::fromContext(context), + BuildQueryPipelineSettings::fromContext(context)); + + res.pipeline = std::move(*pipeline); + res.pipeline.addInterpreterContext(context); + + return res; +} +} diff --git a/src/Interpreters/InterpreterIntersectOrExcept.h b/src/Interpreters/InterpreterIntersectOrExcept.h new file mode 100644 index 00000000000..0069dc02f1d --- /dev/null +++ b/src/Interpreters/InterpreterIntersectOrExcept.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class Context; +class InterpreterSelectQuery; +class QueryPlan; + +class InterpreterIntersectOrExcept : public IInterpreter +{ +public: + InterpreterIntersectOrExcept(const ASTPtr & query_ptr_, ContextPtr context_); + + /// Builds QueryPlan for current query. + virtual void buildQueryPlan(QueryPlan & query_plan); + + BlockIO execute() override; + +private: + ASTPtr query_ptr; + ContextPtr context; + Block result_header; + std::vector> nested_interpreters; + Block getCommonHeader(const Blocks & headers); + + std::unique_ptr + buildCurrentChildInterpreter(const ASTPtr & ast_ptr_); +}; + +} diff --git a/src/Parsers/ASTIntersectOrExcept.cpp b/src/Parsers/ASTIntersectOrExcept.cpp new file mode 100644 index 00000000000..073d63963a9 --- /dev/null +++ b/src/Parsers/ASTIntersectOrExcept.cpp @@ -0,0 +1,28 @@ +#include +#include + +namespace DB +{ + +ASTPtr ASTIntersectOrExcept::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + res->children.push_back(children[0]->clone()); + res->children.push_back(children[1]->clone()); + res->is_except = is_except; + cloneOutputOptions(*res); + return res; +} + +void ASTIntersectOrExcept::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + children[0]->formatImpl(settings, state, frame); + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") + << (is_except ? "EXCEPT" : "INTERSECT ") + << (settings.hilite ? hilite_none : "") << settings.nl_or_ws; + children[1]->formatImpl(settings, state, frame); +} + +} diff --git a/src/Parsers/ASTIntersectOrExcept.h b/src/Parsers/ASTIntersectOrExcept.h new file mode 100644 index 00000000000..a02cb9f7d77 --- /dev/null +++ b/src/Parsers/ASTIntersectOrExcept.h @@ -0,0 +1,18 @@ +#pragma once + +#include + + +namespace DB +{ + +class ASTIntersectOrExcept : public ASTQueryWithOutput +{ +public: + String getID(char) const override { return is_except ? "Except" : "Intersect"; } + ASTPtr clone() const override; + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + bool is_except; +}; + +} diff --git a/src/Parsers/ParserIntersectOrExcept.cpp b/src/Parsers/ParserIntersectOrExcept.cpp new file mode 100644 index 00000000000..a82b8c2b06b --- /dev/null +++ b/src/Parsers/ParserIntersectOrExcept.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword intersect_keyword("INTERSECT"); + ParserKeyword except_keyword("EXCEPT"); + ASTPtr left_node; + ASTPtr right_node; + + auto ast = std::make_shared(); + ast->is_except = false; + + if (!ParserSelectQuery().parse(pos, left_node, expected) && !ParserSubquery().parse(pos, left_node, expected)) + return false; + + if (!intersect_keyword.ignore(pos)) + { + if (!except_keyword.ignore(pos)) + { + return false; + } + else + { + ast->is_except = true; + } + } + + if (!ParserSelectQuery().parse(pos, right_node, expected) && !ParserSubquery().parse(pos, right_node, expected)) + return false; + + if (const auto * ast_subquery = left_node->as()) + left_node = ast_subquery->children.at(0); + if (const auto * ast_subquery = right_node->as()) + right_node = ast_subquery->children.at(0); + + ast->children.push_back(left_node); + ast->children.push_back(right_node); + + node = ast; + return true; +} + +} diff --git a/src/Parsers/ParserIntersectOrExcept.h b/src/Parsers/ParserIntersectOrExcept.h new file mode 100644 index 00000000000..61cc74cf0a9 --- /dev/null +++ b/src/Parsers/ParserIntersectOrExcept.h @@ -0,0 +1,14 @@ +#pragma once +#include + + +namespace DB +{ +class ParserIntersectOrExcept : public IParserBase +{ +protected: + const char * getName() const override { return "INTERSECT or EXCEPT"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index d5aa1e47533..35355b29ebf 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -1,36 +1,37 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include +#include +#include +#include +#include #include - namespace DB { bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserShowTablesQuery show_tables_p; + ParserIntersectOrExcept intersect_p; ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; @@ -54,6 +55,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool parsed = explain_p.parse(pos, query, expected) + || intersect_p.parse(pos, query, expected) || select_p.parse(pos, query, expected) || show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p` || show_tables_p.parse(pos, query, expected) diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp new file mode 100644 index 00000000000..d0a820339d7 --- /dev/null +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +IntersectOrExceptStep::IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, Block result_header, size_t max_threads_) + : is_except(is_except_), header(std::move(result_header)), max_threads(max_threads_) +{ + input_streams = std::move(input_streams_); + output_stream = DataStream{.header = header}; +} + +QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & ) +{ + auto pipeline = std::make_unique(); + QueryPipelineProcessorsCollector collector(*pipeline, this); + + pipelines[0]->addTransform(std::make_shared(header, pipelines[0]->getNumStreams(), 1)); + pipelines[1]->addTransform(std::make_shared(header, pipelines[1]->getNumStreams(), 1)); + + *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); + pipeline->addTransform(std::make_shared(is_except, header)); + + processors = collector.detachProcessors(); + return pipeline; +} + +void IntersectOrExceptStep::describePipeline(FormatSettings & settings) const +{ + IQueryPlanStep::describePipeline(processors, settings); +} + +} diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h new file mode 100644 index 00000000000..d2b515bb1c4 --- /dev/null +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + +class IntersectOrExceptStep : public IQueryPlanStep +{ +public: + /// max_threads is used to limit the number of threads for result pipeline. + IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, Block result_header, size_t max_threads_ = 0); + + String getName() const override { return is_except ? "Except" : "Intersect"; } + + QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; + + void describePipeline(FormatSettings & settings) const override; +private: + bool is_except; + Block header; + size_t max_threads; + Processors processors; +}; + +} + diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp new file mode 100644 index 00000000000..199498bf762 --- /dev/null +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -0,0 +1,192 @@ +#include + +namespace DB +{ +IntersectOrExceptTransform::IntersectOrExceptTransform(bool is_except_, const Block & header_) + : IProcessor(InputPorts(2, header_), {header_}), is_except(is_except_), output(outputs.front()) +{ + const Names & columns = header_.getNames(); + size_t num_columns = columns.empty() ? header_.columns() : columns.size(); + + key_columns_pos.reserve(columns.size()); + for (size_t i = 0; i < num_columns; ++i) + { + auto pos = columns.empty() ? i : header_.getPositionByName(columns[i]); + + const auto & col = header_.getByPosition(pos).column; + + if (!(col && isColumnConst(*col))) + key_columns_pos.emplace_back(pos); + } +} + +IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() +{ + /// Check can output. + if (output.isFinished()) + { + for (auto & in : inputs) + in.close(); + return Status::Finished; + } + + if (!output.canPush()) + { + if (inputs.front().isFinished()) + { + inputs.back().setNotNeeded(); + } + else + { + inputs.front().setNotNeeded(); + } + return Status::PortFull; + } + + /// Output if has data. + if (current_output_chunk) + { + output.push(std::move(current_output_chunk)); + } + + if (push_empty_chunk) + { + output.push(std::move(empty_chunk)); + push_empty_chunk = false; + } + + if (finished_second_input) + { + if (inputs.front().isFinished()) + { + output.finish(); + return Status::Finished; + } + } + else if (inputs.back().isFinished()) + { + finished_second_input = true; + } + + InputPort & input = finished_second_input ? inputs.front() : inputs.back(); + + /// Check can input. + if (!has_input) + { + input.setNeeded(); + if (!input.hasData()) + { + return Status::NeedData; + } + + current_input_chunk = input.pull(); + has_input = true; + } + + return Status::Ready; +} + +void IntersectOrExceptTransform::work() +{ + if (!finished_second_input) + { + accumulate(std::move(current_input_chunk)); + } + else + { + filter(current_input_chunk); + current_output_chunk = std::move(current_input_chunk); + } + + has_input = false; +} + +template +void IntersectOrExceptTransform::addToSet(Method & method, const ColumnRawPtrs & columns, size_t rows, SetVariants & variants) const +{ + typename Method::State state(columns, key_sizes, nullptr); + + for (size_t i = 0; i < rows; ++i) + { + state.emplaceKey(method.data, i, variants.string_pool); + } +} + +template +size_t IntersectOrExceptTransform::buildFilter( + Method & method, const ColumnRawPtrs & columns, IColumn::Filter & filter, size_t rows, SetVariants & variants) const +{ + typename Method::State state(columns, key_sizes, nullptr); + size_t new_rows_num = 0; + + for (size_t i = 0; i < rows; ++i) + { + auto find_result = state.findKey(method.data, i, variants.string_pool); + filter[i] = is_except ? !find_result.isFound() : find_result.isFound(); + if (filter[i]) + ++new_rows_num; + } + return new_rows_num; +} + +void IntersectOrExceptTransform::accumulate(Chunk chunk) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + ColumnRawPtrs column_ptrs; + column_ptrs.reserve(key_columns_pos.size()); + for (auto pos : key_columns_pos) + column_ptrs.emplace_back(columns[pos].get()); + + if (data.empty()) + data.init(SetVariants::chooseMethod(column_ptrs, key_sizes)); + + switch (data.type) + { + case SetVariants::Type::EMPTY: + break; +#define M(NAME) \ + case SetVariants::Type::NAME: \ + addToSet(*data.NAME, column_ptrs, num_rows, data); \ + break; + APPLY_FOR_SET_VARIANTS(M) +#undef M + } +} + +void IntersectOrExceptTransform::filter(Chunk & chunk) +{ + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + ColumnRawPtrs column_ptrs; + column_ptrs.reserve(key_columns_pos.size()); + for (auto pos : key_columns_pos) + column_ptrs.emplace_back(columns[pos].get()); + + if (data.empty()) + data.init(SetVariants::chooseMethod(column_ptrs, key_sizes)); + + IColumn::Filter filter(num_rows); + + size_t new_rows_num = 0; + switch (data.type) + { + case SetVariants::Type::EMPTY: + break; +#define M(NAME) \ + case SetVariants::Type::NAME: \ + new_rows_num = buildFilter(*data.NAME, column_ptrs, filter, num_rows, data); \ + break; + APPLY_FOR_SET_VARIANTS(M) +#undef M + } + + for (auto & column : columns) + column = column->filter(filter, -1); + + chunk.setColumns(std::move(columns), new_rows_num); +} + +} diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.h b/src/Processors/Transforms/IntersectOrExceptTransform.h new file mode 100644 index 00000000000..ebe73fdeb26 --- /dev/null +++ b/src/Processors/Transforms/IntersectOrExceptTransform.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class IntersectOrExceptTransform : public IProcessor +{ +public: + IntersectOrExceptTransform(bool is_except_, const Block & header_); + + Status prepare() override; + void work() override; + + String getName() const override { return is_except ? "Except" : "Intersect"; } + +private: + + bool push_empty_chunk = false; + Chunk empty_chunk; + + bool is_except; + ColumnNumbers key_columns_pos; + SetVariants data; + Sizes key_sizes; + Chunk current_input_chunk; + Chunk current_output_chunk; + bool finished_second_input = false; + bool has_input = false; + OutputPort & output; + + void accumulate(Chunk chunk); + void filter(Chunk & chunk); + template + void addToSet( + Method & method, + const ColumnRawPtrs & key_columns, + size_t rows, + SetVariants & variants) const; + + template + size_t buildFilter( + Method & method, + const ColumnRawPtrs & columns, + IColumn::Filter & filter, + size_t rows, + SetVariants & variants) const; +}; + +} From 566b5335ddc505d66ad1fa4db95497751f692cd0 Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Sun, 30 May 2021 20:58:08 +0300 Subject: [PATCH 02/32] Add ANY, ALL --- src/Parsers/ExpressionListParsers.cpp | 129 +++++++++++++++++++++++++- src/Parsers/ExpressionListParsers.h | 16 +++- 2 files changed, 141 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index eec79edc05e..e9705843b6e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -9,8 +9,7 @@ namespace DB -{ - +{ const char * ParserMultiplicativeExpression::operators[] = { @@ -56,6 +55,19 @@ const char * ParserComparisonExpression::operators[] = nullptr }; +const char * ParserComparisonWithSubqueryExpression::operators[] = +{ + "==", "equals", + "!=", "notEquals", + "<>", "notEquals", + "<=", "lessOrEquals", + ">=", "greaterOrEquals", + "<", "less", + ">", "greater", + "=", "equals", + nullptr +}; + const char * ParserComparisonExpression::overlapping_operators_to_skip[] = { "IN PARTITION", @@ -359,6 +371,119 @@ bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } +bool ParserComparisonWithSubqueryExpression::modifySubquery(String operator_name, ASTPtr subquery_node, bool is_any) +{ + ASTPtr select_with_union_node = subquery_node->children[0]; + if (select_with_union_node->children[0]->children.size() != 1) + return false; + ASTPtr select_node = select_with_union_node->children[0]->children[0]; + ASTPtr exp_list = select_node->children[0]; + auto function = std::make_shared(); + function->arguments = exp_list; + function->children.push_back(exp_list); + + ASTPtr new_exp_list = std::make_shared(); + new_exp_list->children.push_back(function); + + if (operator_name == "greaterOrEquals" || operator_name == "greater") + { + function->name = is_any ? "min" : "max"; + select_node->children[0] = new_exp_list; + return true; + } + + if (operator_name == "lessOrEquals" || operator_name == "less") + { + function->name = is_any ? "max" : "min"; + select_node->children[0] = new_exp_list; + return true; + } + return false; +} + +bool ParserComparisonWithSubqueryExpression::addFunctionIn(String operator_name, ASTPtr & node, bool is_any) +{ + + auto function_in = std::make_shared(); + auto exp_list_in = std::make_shared(); + exp_list_in->children.push_back(node->children[0]->children[0]); + exp_list_in->children.push_back(node->children[0]->children[1]); + function_in->name = "in"; + function_in->children.push_back(exp_list_in); + function_in->arguments = exp_list_in; + + if (operator_name == "equals" && is_any) + { + node = function_in; + return true; + } + + if (operator_name == "notEquals" && !is_any) + { + auto function_not = std::make_shared(); + auto exp_list_not = std::make_shared(); + exp_list_not->children.push_back(function_in); + function_not->name = "not"; + function_not->children.push_back(exp_list_not); + function_not->arguments = exp_list_not; + node = function_not; + return true; + } + return false; +} + +bool ParserComparisonWithSubqueryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + Pos begin = pos; + ASTPtr elem; + if (!elem_parser.parse(pos, elem, expected)) + return next_parser.parse(pos, node, expected); + + /// try to find any of the valid operators + const char ** it; + for (it = operators; *it; it += 2) + if (parseOperator(pos, *it, expected)) + break; + + if (!*it) + { + pos = begin; + return next_parser.parse(pos, node, expected); + } + + bool is_any = true; + if (!ParserKeyword("ANY").ignore(pos, expected)) + { + is_any = false; + if (!ParserKeyword("ALL").ignore(pos, expected)) + { + pos = begin; + return next_parser.parse(pos, node, expected); + } + } + + ASTPtr subquery_node; + if (!ParserSubquery().parse(pos, subquery_node, expected)) + return false; + + /// the first argument of the function is the previous element, the second is the next one + String operator_name = it[1]; + + /// the function corresponding to the operator + auto function = std::make_shared(); + + /// function arguments + auto exp_list = std::make_shared(); + exp_list->children.push_back(elem); + exp_list->children.push_back(subquery_node); + + function->name = operator_name; + function->arguments = exp_list; + function->children.push_back(exp_list); + node = function; + return modifySubquery(operator_name, subquery_node, is_any) || addFunctionIn(operator_name, node, is_any); +} + bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserToken symbol1(TokenType::QuestionMark); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 3a65141d751..acdeac29543 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -364,14 +364,26 @@ protected: } }; +class ParserComparisonWithSubqueryExpression : public IParserBase +{ +private: + static const char * operators[]; + ParserComparisonExpression next_parser; + ParserConcatExpression elem_parser; + static bool addFunctionIn(String operator_name, ASTPtr & node, bool is_any); + static bool modifySubquery(String operator_name, ASTPtr subquery_node, bool is_any); +protected: + const char * getName() const override { return "comparison with ANY/ALL expression"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; /** Parser for nullity checking with IS (NOT) NULL. */ class ParserNullityChecking : public IParserBase { private: - ParserComparisonExpression elem_parser; - + ParserComparisonWithSubqueryExpression elem_parser; protected: const char * getName() const override { return "nullity checking"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; From 1c2e9ba0038e5c18de10530e9f591833b5e0bd62 Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Wed, 2 Jun 2021 02:18:15 +0300 Subject: [PATCH 03/32] Fix bug in parser --- src/Parsers/ExpressionListParsers.cpp | 6 +++--- src/Parsers/ExpressionListParsers.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e9705843b6e..a228dc6617b 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -9,7 +9,7 @@ namespace DB -{ +{ const char * ParserMultiplicativeExpression::operators[] = { @@ -447,8 +447,8 @@ bool ParserComparisonWithSubqueryExpression::parseImpl(Pos & pos, ASTPtr & node, if (!*it) { - pos = begin; - return next_parser.parse(pos, node, expected); + node = elem; + return true; } bool is_any = true; diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index acdeac29543..29335e72b2a 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -369,7 +369,7 @@ class ParserComparisonWithSubqueryExpression : public IParserBase private: static const char * operators[]; ParserComparisonExpression next_parser; - ParserConcatExpression elem_parser; + ParserBetweenExpression elem_parser; static bool addFunctionIn(String operator_name, ASTPtr & node, bool is_any); static bool modifySubquery(String operator_name, ASTPtr subquery_node, bool is_any); protected: From 289c5d3ad680c6e07ef189eed9ea30417acdac65 Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Fri, 4 Jun 2021 05:56:02 +0300 Subject: [PATCH 04/32] Add SingleValueOrNull aggregate function --- .../AggregateFunctionMinMaxAny.h | 58 ++++++++++++++++++- .../AggregateFunctionSingleValueOrNull.cpp | 27 +++++++++ .../registerAggregateFunctions.cpp | 2 + 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index de1f4fad296..6791b98e7e3 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -5,8 +5,10 @@ #include #include +#include #include #include +#include #include #include @@ -628,6 +630,60 @@ struct AggregateFunctionAnyLastData : Data static const char * name() { return "anyLast"; } }; +template +struct AggregateFunctionSingleValueOrNullData : Data +{ + using Self = AggregateFunctionSingleValueOrNullData; + + bool first_value = true; + bool is_null = false; + + bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena) + { + if (first_value) + { + first_value = false; + this->change(column, row_num, arena); + return true; + } + else if (!this->isEqualTo(column, row_num)) + { + is_null = true; + } + return false; + } + + bool changeIfBetter(const Self & to, Arena * arena) + { + if (first_value) + { + first_value = false; + this->change(to, arena); + return true; + } + else if (!this->isEqualTo(to)) + { + is_null = true; + } + return false; + } + + void insertResultInto(IColumn & to) const + { + if (is_null || first_value) + { + to.insertDefault(); + } + else + { + ColumnNullable & col = typeid_cast(to); + col.getNullMapColumn().insertDefault(); + this->Data::insertResultInto(col.getNestedColumn()); + } + } + + static const char * name() { return "singleValueOrNull"; } +}; /** Implement 'heavy hitters' algorithm. * Selects most frequent value if its frequency is more than 50% in each thread of execution. @@ -722,7 +778,7 @@ public: DataTypePtr getReturnType() const override { - return type; + return Data::name() == "singleValueOrNull" ? std::make_shared(type) : type; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override diff --git a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp new file mode 100644 index 00000000000..cd897dfcf6e --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp @@ -0,0 +1,27 @@ +#include +#include +#include +#include "registerAggregateFunctions.h" + + +namespace DB +{ +struct Settings; + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionSingleValueOrNull(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) +{ + return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); +} + +} + +void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory & factory) +{ + factory.registerFunction("singleValueOrNull", createAggregateFunctionSingleValueOrNull); +} + + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index db6d8eb75bc..6fb373a1ce8 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -48,6 +48,7 @@ void registerAggregateFunctionRankCorrelation(AggregateFunctionFactory &); void registerAggregateFunctionMannWhitney(AggregateFunctionFactory &); void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &); void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &); +void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -111,6 +112,7 @@ void registerAggregateFunctions() registerAggregateFunctionMannWhitney(factory); registerAggregateFunctionWelchTTest(factory); registerAggregateFunctionStudentTTest(factory); + registerAggregateFunctionSingleValueOrNull(factory); registerWindowFunctions(factory); From a524feb73272b172c03a22c1540fd117ea93c4a9 Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Thu, 3 Jun 2021 05:19:07 +0300 Subject: [PATCH 05/32] Rewrite ALL/ANY parser --- src/Parsers/ExpressionListParsers.cpp | 282 ++++++++++++++------------ src/Parsers/ExpressionListParsers.h | 21 +- 2 files changed, 158 insertions(+), 145 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index a228dc6617b..acb2e35a84e 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1,8 +1,12 @@ #include +#include #include #include #include +#include +#include +#include #include #include #include @@ -55,19 +59,6 @@ const char * ParserComparisonExpression::operators[] = nullptr }; -const char * ParserComparisonWithSubqueryExpression::operators[] = -{ - "==", "equals", - "!=", "notEquals", - "<>", "notEquals", - "<=", "lessOrEquals", - ">=", "greaterOrEquals", - "<", "less", - ">", "greater", - "=", "equals", - nullptr -}; - const char * ParserComparisonExpression::overlapping_operators_to_skip[] = { "IN PARTITION", @@ -180,6 +171,158 @@ static bool parseOperator(IParser::Pos & pos, const char * op, Expected & expect } } +enum class SubqueryFunctionType +{ + NONE, + ANY, + ALL +}; + +static bool modifyAST(String operator_name, std::shared_ptr & function, SubqueryFunctionType type) +{ + // = ANY --> IN, != ALL --> NOT IN + if ((operator_name == "equals" && type == SubqueryFunctionType::ANY) + || (operator_name == "notEquals" && type == SubqueryFunctionType::ALL)) + { + function->name = "in"; + if (operator_name == "notEquals") + { + auto function_not = std::make_shared(); + auto exp_list_not = std::make_shared(); + exp_list_not->children.push_back(function); + function_not->name = "not"; + function_not->children.push_back(exp_list_not); + function_not->arguments = exp_list_not; + function = function_not; + } + return true; + } + + // subquery --> (SELECT aggregate_function(*) FROM subquery) + auto aggregate_function = std::make_shared(); + auto aggregate_function_exp_list = std::make_shared(); + aggregate_function_exp_list ->children.push_back(std::make_shared()); + aggregate_function->arguments = aggregate_function_exp_list; + aggregate_function->children.push_back(aggregate_function_exp_list); + + ASTPtr subquery_node = function->children[0]->children[1]; + auto select_query = std::make_shared(); + auto tables_in_select = std::make_shared(); + auto tables_in_select_element = std::make_shared(); + auto table_expression = std::make_shared(); + table_expression->subquery = subquery_node; + table_expression->children.push_back(subquery_node); + tables_in_select_element->table_expression = table_expression; + tables_in_select_element->children.push_back(table_expression); + tables_in_select->children.push_back(tables_in_select_element); + auto select_exp_list = std::make_shared(); + select_exp_list->children.push_back(aggregate_function); + select_query->children.push_back(select_exp_list); + select_query->children.push_back(tables_in_select); + select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_exp_list)); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select)); + + auto select_with_union_query = std::make_shared(); + auto list_of_selects = std::make_shared(); + list_of_selects->children.push_back(select_query); + select_with_union_query->list_of_selects = list_of_selects; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + auto new_subquery = std::make_shared(); + new_subquery->children.push_back(select_with_union_query); + function->children[0]->children.pop_back(); + function->children[0]->children.push_back(new_subquery); + + if (operator_name == "greaterOrEquals" || operator_name == "greater") + { + aggregate_function->name = type == SubqueryFunctionType::ANY ? "min" : "max"; + return true; + } + if (operator_name == "lessOrEquals" || operator_name == "less") + { + aggregate_function->name = type == SubqueryFunctionType::ANY ? "max" : "min"; + return true; + } + if (operator_name == "equals" || operator_name == "notEquals") + { + aggregate_function->name = "singleValueOrNull"; + return true; + } + return false; +} + +bool ParserComparisonExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + bool first = true; + + auto current_depth = pos.depth; + while (true) + { + if (first) + { + ASTPtr elem; + if (!elem_parser.parse(pos, elem, expected)) + return false; + + node = elem; + first = false; + } + else + { + /// try to find any of the valid operators + const char ** it; + Expected stub; + for (it = overlapping_operators_to_skip; *it; ++it) + if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) + break; + + if (*it) + break; + + for (it = operators; *it; it += 2) + if (parseOperator(pos, *it, expected)) + break; + + if (!*it) + break; + + /// the function corresponding to the operator + auto function = std::make_shared(); + + /// function arguments + auto exp_list = std::make_shared(); + + ASTPtr elem; + SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; + if (ParserKeyword("ANY").ignore(pos, expected)) + subquery_function_type = SubqueryFunctionType::ANY; + else if (ParserKeyword("ALL").ignore(pos, expected)) + subquery_function_type = SubqueryFunctionType::ALL; + else if (!elem_parser.parse(pos, elem, expected)) + return false; + + if (subquery_function_type != SubqueryFunctionType::NONE && !ParserSubquery().parse(pos, elem, expected)) + return false; + + /// the first argument of the function is the previous element, the second is the next one + function->name = it[1]; + function->arguments = exp_list; + function->children.push_back(exp_list); + + exp_list->children.push_back(node); + exp_list->children.push_back(elem); + + if (subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function->name, function, subquery_function_type)) + return false; + + pos.increaseDepth(); + node = function; + } + } + + pos.depth = current_depth; + return true; +} bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -371,119 +514,6 @@ bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } -bool ParserComparisonWithSubqueryExpression::modifySubquery(String operator_name, ASTPtr subquery_node, bool is_any) -{ - ASTPtr select_with_union_node = subquery_node->children[0]; - if (select_with_union_node->children[0]->children.size() != 1) - return false; - ASTPtr select_node = select_with_union_node->children[0]->children[0]; - ASTPtr exp_list = select_node->children[0]; - auto function = std::make_shared(); - function->arguments = exp_list; - function->children.push_back(exp_list); - - ASTPtr new_exp_list = std::make_shared(); - new_exp_list->children.push_back(function); - - if (operator_name == "greaterOrEquals" || operator_name == "greater") - { - function->name = is_any ? "min" : "max"; - select_node->children[0] = new_exp_list; - return true; - } - - if (operator_name == "lessOrEquals" || operator_name == "less") - { - function->name = is_any ? "max" : "min"; - select_node->children[0] = new_exp_list; - return true; - } - return false; -} - -bool ParserComparisonWithSubqueryExpression::addFunctionIn(String operator_name, ASTPtr & node, bool is_any) -{ - - auto function_in = std::make_shared(); - auto exp_list_in = std::make_shared(); - exp_list_in->children.push_back(node->children[0]->children[0]); - exp_list_in->children.push_back(node->children[0]->children[1]); - function_in->name = "in"; - function_in->children.push_back(exp_list_in); - function_in->arguments = exp_list_in; - - if (operator_name == "equals" && is_any) - { - node = function_in; - return true; - } - - if (operator_name == "notEquals" && !is_any) - { - auto function_not = std::make_shared(); - auto exp_list_not = std::make_shared(); - exp_list_not->children.push_back(function_in); - function_not->name = "not"; - function_not->children.push_back(exp_list_not); - function_not->arguments = exp_list_not; - node = function_not; - return true; - } - return false; -} - -bool ParserComparisonWithSubqueryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - Pos begin = pos; - ASTPtr elem; - if (!elem_parser.parse(pos, elem, expected)) - return next_parser.parse(pos, node, expected); - - /// try to find any of the valid operators - const char ** it; - for (it = operators; *it; it += 2) - if (parseOperator(pos, *it, expected)) - break; - - if (!*it) - { - node = elem; - return true; - } - - bool is_any = true; - if (!ParserKeyword("ANY").ignore(pos, expected)) - { - is_any = false; - if (!ParserKeyword("ALL").ignore(pos, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - } - - ASTPtr subquery_node; - if (!ParserSubquery().parse(pos, subquery_node, expected)) - return false; - - /// the first argument of the function is the previous element, the second is the next one - String operator_name = it[1]; - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - exp_list->children.push_back(elem); - exp_list->children.push_back(subquery_node); - - function->name = operator_name; - function->arguments = exp_list; - function->children.push_back(exp_list); - node = function; - return modifySubquery(operator_name, subquery_node, is_any) || addFunctionIn(operator_name, node, is_any); -} - bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserToken symbol1(TokenType::QuestionMark); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 29335e72b2a..82fd0eefc8e 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -353,28 +353,11 @@ class ParserComparisonExpression : public IParserBase private: static const char * operators[]; static const char * overlapping_operators_to_skip[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, overlapping_operators_to_skip, std::make_unique()}; + ParserBetweenExpression elem_parser; protected: const char * getName() const override{ return "comparison expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - -class ParserComparisonWithSubqueryExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserComparisonExpression next_parser; - ParserBetweenExpression elem_parser; - static bool addFunctionIn(String operator_name, ASTPtr & node, bool is_any); - static bool modifySubquery(String operator_name, ASTPtr subquery_node, bool is_any); -protected: - const char * getName() const override { return "comparison with ANY/ALL expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; @@ -383,7 +366,7 @@ protected: class ParserNullityChecking : public IParserBase { private: - ParserComparisonWithSubqueryExpression elem_parser; + ParserComparisonExpression elem_parser; protected: const char * getName() const override { return "nullity checking"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; From 15843723ec9b8149e876a7414629b72c68744bb7 Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Sat, 5 Jun 2021 02:30:57 +0300 Subject: [PATCH 06/32] Fix ALL/ANY implementation --- src/Parsers/ExpressionListParsers.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index acb2e35a84e..920a31199d4 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -243,9 +243,23 @@ static bool modifyAST(String operator_name, std::shared_ptr & funct aggregate_function->name = type == SubqueryFunctionType::ANY ? "max" : "min"; return true; } + + // = ALL --> IN (SELECT singleValueOrNull(*) FROM subquery) + // != ANY --> NOT IN (SELECT singleValueOrNull(*) FROM subquery) if (operator_name == "equals" || operator_name == "notEquals") { aggregate_function->name = "singleValueOrNull"; + function->name = "in"; + if (operator_name == "notEquals") + { + auto function_not = std::make_shared(); + auto exp_list_not = std::make_shared(); + exp_list_not->children.push_back(function); + function_not->name = "not"; + function_not->children.push_back(exp_list_not); + function_not->arguments = exp_list_not; + function = function_not; + } return true; } return false; From 1d46e443e29dd77c401b6345f483d750530c1bc2 Mon Sep 17 00:00:00 2001 From: Kirill Ershov Date: Sat, 5 Jun 2021 03:58:22 +0300 Subject: [PATCH 07/32] Fix INTERSECT/EXCEPT parser --- src/Parsers/ASTIntersectOrExcept.cpp | 2 +- src/Parsers/ExpressionElementParsers.cpp | 2 ++ src/Parsers/ParserIntersectOrExcept.cpp | 11 +++-------- src/Processors/QueryPlan/IntersectOrExceptStep.cpp | 2 +- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Parsers/ASTIntersectOrExcept.cpp b/src/Parsers/ASTIntersectOrExcept.cpp index 073d63963a9..a05d7ee86c9 100644 --- a/src/Parsers/ASTIntersectOrExcept.cpp +++ b/src/Parsers/ASTIntersectOrExcept.cpp @@ -20,7 +20,7 @@ void ASTIntersectOrExcept::formatQueryImpl(const FormatSettings & settings, Form children[0]->formatImpl(settings, state, frame); std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") - << (is_except ? "EXCEPT" : "INTERSECT ") + << (is_except ? "EXCEPT" : "INTERSECT") << (settings.hilite ? hilite_none : "") << settings.nl_or_ws; children[1]->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 75a341a543d..d08ce2a215b 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1697,6 +1697,8 @@ const char * ParserAlias::restricted_keywords[] = "WHERE", "WINDOW", "WITH", + "INTERSECT", + "EXCEPT", nullptr }; diff --git a/src/Parsers/ParserIntersectOrExcept.cpp b/src/Parsers/ParserIntersectOrExcept.cpp index a82b8c2b06b..6d5da54fa38 100644 --- a/src/Parsers/ParserIntersectOrExcept.cpp +++ b/src/Parsers/ParserIntersectOrExcept.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { @@ -17,7 +17,7 @@ bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & exp auto ast = std::make_shared(); ast->is_except = false; - if (!ParserSelectQuery().parse(pos, left_node, expected) && !ParserSubquery().parse(pos, left_node, expected)) + if (!ParserSelectWithUnionQuery().parse(pos, left_node, expected) && !ParserSubquery().parse(pos, left_node, expected)) return false; if (!intersect_keyword.ignore(pos)) @@ -32,14 +32,9 @@ bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } } - if (!ParserSelectQuery().parse(pos, right_node, expected) && !ParserSubquery().parse(pos, right_node, expected)) + if (!ParserSelectWithUnionQuery().parse(pos, right_node, expected) && !ParserSubquery().parse(pos, right_node, expected)) return false; - if (const auto * ast_subquery = left_node->as()) - left_node = ast_subquery->children.at(0); - if (const auto * ast_subquery = right_node->as()) - right_node = ast_subquery->children.at(0); - ast->children.push_back(left_node); ast->children.push_back(right_node); diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index d0a820339d7..28f34bda5db 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -15,7 +15,7 @@ IntersectOrExceptStep::IntersectOrExceptStep(bool is_except_, DataStreams input_ output_stream = DataStream{.header = header}; } -QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & ) +QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) { auto pipeline = std::make_unique(); QueryPipelineProcessorsCollector collector(*pipeline, this); From 1a607af42920eb18016526b5682f00484ef888a5 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 3 Aug 2021 17:53:18 +0300 Subject: [PATCH 08/32] add test for parsing maps --- src/DataTypes/DataTypeMap.cpp | 2 +- .../0_stateless/02002_parse_map_int_key.reference | 1 + tests/queries/0_stateless/02002_parse_map_int_key.sql | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02002_parse_map_int_key.reference create mode 100644 tests/queries/0_stateless/02002_parse_map_int_key.sql diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 8fd375aa86e..b0bf459b4ca 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -79,7 +79,7 @@ void DataTypeMap::assertKeyType() const std::string DataTypeMap::doGetName() const { WriteBufferFromOwnString s; - s << "Map(" << key_type->getName() << "," << value_type->getName() << ")"; + s << "Map(" << key_type->getName() << ", " << value_type->getName() << ")"; return s.str(); } diff --git a/tests/queries/0_stateless/02002_parse_map_int_key.reference b/tests/queries/0_stateless/02002_parse_map_int_key.reference new file mode 100644 index 00000000000..dc02589d4bc --- /dev/null +++ b/tests/queries/0_stateless/02002_parse_map_int_key.reference @@ -0,0 +1 @@ +{1:2,3:4,5:6,7:8} {'2021-05-20':1,'2021-05-21':2,'2021-05-22':3,'2021-05-23':4} diff --git a/tests/queries/0_stateless/02002_parse_map_int_key.sql b/tests/queries/0_stateless/02002_parse_map_int_key.sql new file mode 100644 index 00000000000..ecd2a090975 --- /dev/null +++ b/tests/queries/0_stateless/02002_parse_map_int_key.sql @@ -0,0 +1,11 @@ +SET allow_experimental_map_type = 1; + +DROP TABLE IF EXISTS t_map_int_key; +CREATE TABLE t_map_int_key (m1 Map(UInt32, UInt32), m2 Map(Date, UInt32)) ENGINE = Memory; + +INSERT INTO t_map_int_key FORMAT CSV "{1:2, 3: 4, 5 :6, 7 : 8}","{'2021-05-20':1, '2021-05-21': 2, '2021-05-22' :3, '2021-05-23' : 4}" +; + +SELECT m1, m2 FROM t_map_int_key; + +DROP TABLE t_map_int_key; From 5abe33e1a38cd44e585ec106468edc475d7170cb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 8 Aug 2021 20:12:12 +0300 Subject: [PATCH 09/32] Better --- .../InterpreterIntersectOrExcept.cpp | 45 ++++++++++------- .../InterpreterIntersectOrExcept.h | 18 ++++--- .../QueryPlan/IntersectOrExceptStep.cpp | 50 +++++++++++++++++-- .../QueryPlan/IntersectOrExceptStep.h | 6 ++- .../Transforms/IntersectOrExceptTransform.cpp | 40 +++++++-------- .../Transforms/IntersectOrExceptTransform.h | 30 +++++------ 6 files changed, 119 insertions(+), 70 deletions(-) diff --git a/src/Interpreters/InterpreterIntersectOrExcept.cpp b/src/Interpreters/InterpreterIntersectOrExcept.cpp index c85bd29e16f..c85c39824d8 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.cpp +++ b/src/Interpreters/InterpreterIntersectOrExcept.cpp @@ -8,6 +8,8 @@ #include #include #include +#include + namespace DB { @@ -17,16 +19,20 @@ namespace ErrorCodes extern const int INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH; } -InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ptr_, ContextPtr context_) - : query_ptr(query_ptr_), context(Context::createCopy(context_)) +InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ptr, ContextPtr context_) + : context(Context::createCopy(context_)) + , is_except(query_ptr->as()->is_except) { ASTIntersectOrExcept * ast = query_ptr->as(); + size_t num_children = ast->children.size(); + if (!num_children) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTIntersectOrExceptQuery"); + nested_interpreters.resize(num_children); + for (size_t i = 0; i < num_children; ++i) - { nested_interpreters[i] = buildCurrentChildInterpreter(ast->children[i]); - } Blocks headers(num_children); for (size_t query_num = 0; query_num < num_children; ++query_num) @@ -35,8 +41,7 @@ InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ result_header = getCommonHeader(headers); } - -Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) +Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) const { size_t num_selects = headers.size(); Block common_header = headers.front(); @@ -45,16 +50,12 @@ Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) for (size_t query_num = 1; query_num < num_selects; ++query_num) { if (headers[query_num].columns() != num_columns) - throw Exception( - "Different number of columns in " - + toString(query_ptr->as()->is_except ? "EXCEPT" : "INTERSECT") - + " elements:\n" + common_header.dumpNames() + "\nand\n" - + headers[query_num].dumpNames() + "\n", - ErrorCodes::INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH); + throw Exception(ErrorCodes::INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH, + "Different number of columns in {} elements:\n {} \nand\n {}", + getName(), common_header.dumpNames(), headers[query_num].dumpNames()); } std::vector columns(num_selects); - for (size_t column_num = 0; column_num < num_columns; ++column_num) { for (size_t i = 0; i < num_selects; ++i) @@ -67,7 +68,6 @@ Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) return common_header; } - std::unique_ptr InterpreterIntersectOrExcept::buildCurrentChildInterpreter(const ASTPtr & ast_ptr_) { @@ -80,7 +80,6 @@ InterpreterIntersectOrExcept::buildCurrentChildInterpreter(const ASTPtr & ast_pt void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) { size_t num_plans = nested_interpreters.size(); - std::vector> plans(num_plans); DataStreams data_streams(num_plans); @@ -88,12 +87,23 @@ void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) { plans[i] = std::make_unique(); nested_interpreters[i]->buildQueryPlan(*plans[i]); + + if (!blocksHaveEqualStructure(plans[i]->getCurrentDataStream().header, result_header)) + { + auto actions_dag = ActionsDAG::makeConvertingActions( + plans[i]->getCurrentDataStream().header.getColumnsWithTypeAndName(), + result_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto converting_step = std::make_unique(plans[i]->getCurrentDataStream(), std::move(actions_dag)); + converting_step->setStepDescription("Conversion before UNION"); + plans[i]->addStep(std::move(converting_step)); + } + data_streams[i] = plans[i]->getCurrentDataStream(); } auto max_threads = context->getSettingsRef().max_threads; - auto step = std::make_unique( - query_ptr->as()->is_except, std::move(data_streams), result_header, max_threads); + auto step = std::make_unique(is_except, std::move(data_streams), max_threads); query_plan.unitePlans(std::move(step), std::move(plans)); } @@ -113,4 +123,5 @@ BlockIO InterpreterIntersectOrExcept::execute() return res; } + } diff --git a/src/Interpreters/InterpreterIntersectOrExcept.h b/src/Interpreters/InterpreterIntersectOrExcept.h index 0069dc02f1d..34a58c0c05a 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.h +++ b/src/Interpreters/InterpreterIntersectOrExcept.h @@ -16,20 +16,22 @@ class InterpreterIntersectOrExcept : public IInterpreter public: InterpreterIntersectOrExcept(const ASTPtr & query_ptr_, ContextPtr context_); - /// Builds QueryPlan for current query. - virtual void buildQueryPlan(QueryPlan & query_plan); - BlockIO execute() override; private: - ASTPtr query_ptr; - ContextPtr context; - Block result_header; - std::vector> nested_interpreters; - Block getCommonHeader(const Blocks & headers); + String getName() const { return is_except ? "EXCEPT" : "INTERSECT"; } + + Block getCommonHeader(const Blocks & headers) const; std::unique_ptr buildCurrentChildInterpreter(const ASTPtr & ast_ptr_); + + void buildQueryPlan(QueryPlan & query_plan); + + ContextPtr context; + bool is_except; + Block result_header; + std::vector> nested_interpreters; }; } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index 28f34bda5db..f04885f4640 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -1,18 +1,36 @@ #include +#include #include #include #include +#include #include #include + namespace DB { -IntersectOrExceptStep::IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, Block result_header, size_t max_threads_) - : is_except(is_except_), header(std::move(result_header)), max_threads(max_threads_) +Block IntersectOrExceptStep::checkHeaders(const DataStreams & input_streams_) const +{ + if (input_streams_.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform {} on empty set of query plan steps", getName()); + + Block res = input_streams_.front().header; + for (const auto & stream : input_streams_) + assertBlocksHaveEqualStructure(stream.header, res, "IntersectOrExceptStep"); + + return res; +} + +IntersectOrExceptStep::IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, size_t max_threads_) + : is_except(is_except_), header(checkHeaders(input_streams_)), max_threads(max_threads_) { input_streams = std::move(input_streams_); - output_stream = DataStream{.header = header}; + if (input_streams.size() == 1) + output_stream = input_streams.front(); + else + output_stream = DataStream{.header = header}; } QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) @@ -20,8 +38,30 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, auto pipeline = std::make_unique(); QueryPipelineProcessorsCollector collector(*pipeline, this); - pipelines[0]->addTransform(std::make_shared(header, pipelines[0]->getNumStreams(), 1)); - pipelines[1]->addTransform(std::make_shared(header, pipelines[1]->getNumStreams(), 1)); + if (pipelines.empty()) + { + pipeline->init(Pipe(std::make_shared(output_stream->header))); + processors = collector.detachProcessors(); + return pipeline; + } + + for (auto & cur_pipeline : pipelines) + { + /// Just in case. + if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header)) + { + auto converting_dag = ActionsDAG::makeConvertingActions( + cur_pipeline->getHeader().getColumnsWithTypeAndName(), + getOutputStream().header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); + + auto converting_actions = std::make_shared(std::move(converting_dag)); + cur_pipeline->addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(cur_header, converting_actions); + }); + } + } *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); pipeline->addTransform(std::make_shared(is_except, header)); diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index d2b515bb1c4..7938a9adad5 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -8,14 +8,17 @@ class IntersectOrExceptStep : public IQueryPlanStep { public: /// max_threads is used to limit the number of threads for result pipeline. - IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, Block result_header, size_t max_threads_ = 0); + IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, size_t max_threads_ = 0); String getName() const override { return is_except ? "Except" : "Intersect"; } QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; void describePipeline(FormatSettings & settings) const override; + private: + Block checkHeaders(const DataStreams & input_streams_) const; + bool is_except; Block header; size_t max_threads; @@ -23,4 +26,3 @@ private: }; } - diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index 199498bf762..e5e8ff705c8 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -1,9 +1,12 @@ #include + namespace DB { + IntersectOrExceptTransform::IntersectOrExceptTransform(bool is_except_, const Block & header_) - : IProcessor(InputPorts(2, header_), {header_}), is_except(is_except_), output(outputs.front()) + : IProcessor(InputPorts(2, header_), {header_}) + , is_except(is_except_) { const Names & columns = header_.getNames(); size_t num_columns = columns.empty() ? header_.columns() : columns.size(); @@ -11,18 +14,17 @@ IntersectOrExceptTransform::IntersectOrExceptTransform(bool is_except_, const Bl key_columns_pos.reserve(columns.size()); for (size_t i = 0; i < num_columns; ++i) { - auto pos = columns.empty() ? i : header_.getPositionByName(columns[i]); - - const auto & col = header_.getByPosition(pos).column; - - if (!(col && isColumnConst(*col))) - key_columns_pos.emplace_back(pos); + auto pos = columns.empty() ? i + : header_.getPositionByName(columns[i]); + key_columns_pos.emplace_back(pos); } } + IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() { - /// Check can output. + auto & output = outputs.front(); + if (output.isFinished()) { for (auto & in : inputs) @@ -32,14 +34,8 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() if (!output.canPush()) { - if (inputs.front().isFinished()) - { - inputs.back().setNotNeeded(); - } - else - { - inputs.front().setNotNeeded(); - } + for (auto & input : inputs) + input.setNotNeeded(); return Status::PortFull; } @@ -74,10 +70,9 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() if (!has_input) { input.setNeeded(); + if (!input.hasData()) - { return Status::NeedData; - } current_input_chunk = input.pull(); has_input = true; @@ -86,6 +81,7 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() return Status::Ready; } + void IntersectOrExceptTransform::work() { if (!finished_second_input) @@ -101,17 +97,17 @@ void IntersectOrExceptTransform::work() has_input = false; } + template void IntersectOrExceptTransform::addToSet(Method & method, const ColumnRawPtrs & columns, size_t rows, SetVariants & variants) const { typename Method::State state(columns, key_sizes, nullptr); for (size_t i = 0; i < rows; ++i) - { state.emplaceKey(method.data, i, variants.string_pool); - } } + template size_t IntersectOrExceptTransform::buildFilter( Method & method, const ColumnRawPtrs & columns, IColumn::Filter & filter, size_t rows, SetVariants & variants) const @@ -129,6 +125,7 @@ size_t IntersectOrExceptTransform::buildFilter( return new_rows_num; } + void IntersectOrExceptTransform::accumulate(Chunk chunk) { auto num_rows = chunk.getNumRows(); @@ -136,6 +133,7 @@ void IntersectOrExceptTransform::accumulate(Chunk chunk) ColumnRawPtrs column_ptrs; column_ptrs.reserve(key_columns_pos.size()); + for (auto pos : key_columns_pos) column_ptrs.emplace_back(columns[pos].get()); @@ -155,6 +153,7 @@ void IntersectOrExceptTransform::accumulate(Chunk chunk) } } + void IntersectOrExceptTransform::filter(Chunk & chunk) { auto num_rows = chunk.getNumRows(); @@ -162,6 +161,7 @@ void IntersectOrExceptTransform::filter(Chunk & chunk) ColumnRawPtrs column_ptrs; column_ptrs.reserve(key_columns_pos.size()); + for (auto pos : key_columns_pos) column_ptrs.emplace_back(columns[pos].get()); diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.h b/src/Processors/Transforms/IntersectOrExceptTransform.h index ebe73fdeb26..3c2b9581d6d 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.h +++ b/src/Processors/Transforms/IntersectOrExceptTransform.h @@ -12,17 +12,18 @@ class IntersectOrExceptTransform : public IProcessor public: IntersectOrExceptTransform(bool is_except_, const Block & header_); - Status prepare() override; - void work() override; - String getName() const override { return is_except ? "Except" : "Intersect"; } +protected: + Status prepare() override; + + void work() override; + private: + bool is_except; bool push_empty_chunk = false; Chunk empty_chunk; - - bool is_except; ColumnNumbers key_columns_pos; SetVariants data; Sizes key_sizes; @@ -30,24 +31,17 @@ private: Chunk current_output_chunk; bool finished_second_input = false; bool has_input = false; - OutputPort & output; void accumulate(Chunk chunk); + void filter(Chunk & chunk); - template - void addToSet( - Method & method, - const ColumnRawPtrs & key_columns, - size_t rows, - SetVariants & variants) const; template - size_t buildFilter( - Method & method, - const ColumnRawPtrs & columns, - IColumn::Filter & filter, - size_t rows, - SetVariants & variants) const; + void addToSet(Method & method, const ColumnRawPtrs & key_columns, size_t rows, SetVariants & variants) const; + + template + size_t buildFilter(Method & method, const ColumnRawPtrs & columns, + IColumn::Filter & filter, size_t rows, SetVariants & variants) const; }; } From 0f67acf6482ae1b5d736a01144255c8daf60e7eb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 8 Aug 2021 20:16:22 +0300 Subject: [PATCH 10/32] Add test --- ...02004_intersect_except_operators.reference | 25 +++++++++++++++++++ .../02004_intersect_except_operators.sql | 8 ++++++ 2 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/02004_intersect_except_operators.reference create mode 100644 tests/queries/0_stateless/02004_intersect_except_operators.sql diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference new file mode 100644 index 00000000000..763a5872cce --- /dev/null +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -0,0 +1,25 @@ +-- { echo } +select 1 intersect select 1; +1 +select 2 intersect select 1; +select 1 except select 1; +select 2 except select 1; +2 +select number from numbers(5, 5) intersect select number from numbers(20); +5 +6 +7 +8 +9 +select number from numbers(10) except select number from numbers(5); +5 +6 +7 +8 +9 +select number, number+10 from numbers(12) except select number+5, number+15 from numbers(10); +0 10 +1 11 +2 12 +3 13 +4 14 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql new file mode 100644 index 00000000000..d0416fd899e --- /dev/null +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -0,0 +1,8 @@ +-- { echo } +select 1 intersect select 1; +select 2 intersect select 1; +select 1 except select 1; +select 2 except select 1; +select number from numbers(5, 5) intersect select number from numbers(20); +select number from numbers(10) except select number from numbers(5); +select number, number+10 from numbers(12) except select number+5, number+15 from numbers(10); From b90dc1017bfdc4e826cd3181209415bbb0b7e754 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 9 Aug 2021 16:43:10 +0300 Subject: [PATCH 11/32] fix tests --- ...map_add_map_subtract_on_map_type.reference | 54 +++++++++---------- .../01550_type_map_formats.reference | 6 +-- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference index 96bafc2c79c..304f7407cf5 100644 --- a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference @@ -18,38 +18,38 @@ {1:3,2:2,8:2} {1:3,2:2,9:2} {1:3,2:2,10:2} -{1:2,2:2} Map(UInt8,UInt64) -{1:2,2:2} Map(UInt16,UInt64) -{1:2,2:2} Map(UInt32,UInt64) -{1:2,2:2} Map(UInt64,UInt64) -{1:2,2:2} Map(UInt128,UInt128) -{1:2,2:2} Map(UInt256,UInt256) -{1:2,2:2} Map(Int16,UInt64) -{1:2,2:2} Map(Int16,Int64) -{1:2,2:2} Map(Int32,Int64) -{1:2,2:2} Map(Int64,Int64) -{1:2,2:2} Map(Int128,Int128) -{1:2,2:2} Map(Int256,Int256) -{1:3.300000023841858,2:2} Map(UInt8,Float64) -{1:3.3000000000000003,2:2} Map(UInt8,Float64) +{1:2,2:2} Map(UInt8, UInt64) +{1:2,2:2} Map(UInt16, UInt64) +{1:2,2:2} Map(UInt32, UInt64) +{1:2,2:2} Map(UInt64, UInt64) +{1:2,2:2} Map(UInt128, UInt128) +{1:2,2:2} Map(UInt256, UInt256) +{1:2,2:2} Map(Int16, UInt64) +{1:2,2:2} Map(Int16, Int64) +{1:2,2:2} Map(Int32, Int64) +{1:2,2:2} Map(Int64, Int64) +{1:2,2:2} Map(Int128, Int128) +{1:2,2:2} Map(Int256, Int256) +{1:3.300000023841858,2:2} Map(UInt8, Float64) +{1:3.3000000000000003,2:2} Map(UInt8, Float64) {'a':1,'b':2} {'a':1,'b':1,'c':1} {'a':1,'b':1,'d':1} -{'a':1,'b':2} Map(String,UInt64) -{'a':1,'b':1,'c':1} Map(String,UInt64) -{'a':1,'b':1,'d':1} Map(String,UInt64) +{'a':1,'b':2} Map(String, UInt64) +{'a':1,'b':1,'c':1} Map(String, UInt64) +{'a':1,'b':1,'d':1} Map(String, UInt64) {'a':1,'b':2} {'a':1,'b':1,'c':1} {'a':1,'b':1,'d':1} -{'a':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) -{'b':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) -{'a':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) -{'b':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) -{'00000000-89ab-cdef-0123-456789abcdef':2} Map(UUID,Int64) -{'11111111-89ab-cdef-0123-456789abcdef':4} Map(UUID,Int64) +{'a':2} Map(Enum16(\'a\' = 1, \'b\' = 2), Int64) +{'b':2} Map(Enum16(\'a\' = 1, \'b\' = 2), Int64) +{'a':2} Map(Enum8(\'a\' = 1, \'b\' = 2), Int64) +{'b':2} Map(Enum8(\'a\' = 1, \'b\' = 2), Int64) +{'00000000-89ab-cdef-0123-456789abcdef':2} Map(UUID, Int64) +{'11111111-89ab-cdef-0123-456789abcdef':4} Map(UUID, Int64) {1:0,2:0} Map(UInt8,UInt64) -{1:18446744073709551615,2:18446744073709551615} Map(UInt8,UInt64) +{1:18446744073709551615,2:18446744073709551615} Map(UInt8, UInt64) {1:-1,2:-1} Map(UInt8,Int64) -{1:-1.0999999761581423,2:0} Map(UInt8,Float64) -{1:-1,2:-1} Map(UInt8,Int64) -{1:-2,2:-2,3:1} Map(UInt8,Int64) +{1:-1.0999999761581423,2:0} Map(UInt8, Float64) +{1:-1,2:-1} Map(UInt8, Int64) +{1:-2,2:-2,3:1} Map(UInt8, Int64) diff --git a/tests/queries/0_stateless/01550_type_map_formats.reference b/tests/queries/0_stateless/01550_type_map_formats.reference index ca081db75a2..998473ef63a 100644 --- a/tests/queries/0_stateless/01550_type_map_formats.reference +++ b/tests/queries/0_stateless/01550_type_map_formats.reference @@ -4,15 +4,15 @@ JSON [ { "name": "m", - "type": "Map(String,UInt32)" + "type": "Map(String, UInt32)" }, { "name": "m1", - "type": "Map(String,Date)" + "type": "Map(String, Date)" }, { "name": "m2", - "type": "Map(String,Array(UInt32))" + "type": "Map(String, Array(UInt32))" } ], From 4eb4dd97d4d1869d713c901b053fcb18bdb0ae23 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 10 Aug 2021 09:24:12 +0300 Subject: [PATCH 12/32] Support not only 2 inputs, support any size sequence of intersect / except together --- .../InterpreterIntersectOrExcept.cpp | 13 ++-- .../InterpreterIntersectOrExcept.h | 6 +- src/Parsers/ASTIntersectOrExcept.cpp | 55 +++++++++++-- src/Parsers/ASTIntersectOrExcept.h | 16 +++- src/Parsers/ParserIntersectOrExcept.cpp | 61 ++++++++++----- .../QueryPlan/IntersectOrExceptStep.cpp | 7 +- .../QueryPlan/IntersectOrExceptStep.h | 10 ++- .../Transforms/IntersectOrExceptTransform.cpp | 78 +++++++++++++------ .../Transforms/IntersectOrExceptTransform.h | 21 +++-- ...02004_intersect_except_operators.reference | 30 +++++++ .../02004_intersect_except_operators.sql | 10 +++ 11 files changed, 234 insertions(+), 73 deletions(-) diff --git a/src/Interpreters/InterpreterIntersectOrExcept.cpp b/src/Interpreters/InterpreterIntersectOrExcept.cpp index c85c39824d8..d706ab02bfd 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.cpp +++ b/src/Interpreters/InterpreterIntersectOrExcept.cpp @@ -21,18 +21,21 @@ namespace ErrorCodes InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ptr, ContextPtr context_) : context(Context::createCopy(context_)) - , is_except(query_ptr->as()->is_except) { ASTIntersectOrExcept * ast = query_ptr->as(); + auto children = ast->list_of_selects->children; + modes = ast->list_of_modes; + if (modes.size() + 1 != children.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of modes and number of children are not consistent"); - size_t num_children = ast->children.size(); + size_t num_children = children.size(); if (!num_children) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTIntersectOrExceptQuery"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No children in ASTIntersectOrExceptQuery"); nested_interpreters.resize(num_children); for (size_t i = 0; i < num_children; ++i) - nested_interpreters[i] = buildCurrentChildInterpreter(ast->children[i]); + nested_interpreters[i] = buildCurrentChildInterpreter(children.at(i)); Blocks headers(num_children); for (size_t query_num = 0; query_num < num_children; ++query_num) @@ -103,7 +106,7 @@ void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) } auto max_threads = context->getSettingsRef().max_threads; - auto step = std::make_unique(is_except, std::move(data_streams), max_threads); + auto step = std::make_unique(std::move(data_streams), modes, max_threads); query_plan.unitePlans(std::move(step), std::move(plans)); } diff --git a/src/Interpreters/InterpreterIntersectOrExcept.h b/src/Interpreters/InterpreterIntersectOrExcept.h index 34a58c0c05a..8021a25df06 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.h +++ b/src/Interpreters/InterpreterIntersectOrExcept.h @@ -3,6 +3,8 @@ #include #include #include +#include + namespace DB { @@ -19,7 +21,7 @@ public: BlockIO execute() override; private: - String getName() const { return is_except ? "EXCEPT" : "INTERSECT"; } + String getName() const { return "IntersectExcept"; } Block getCommonHeader(const Blocks & headers) const; @@ -29,9 +31,9 @@ private: void buildQueryPlan(QueryPlan & query_plan); ContextPtr context; - bool is_except; Block result_header; std::vector> nested_interpreters; + ASTIntersectOrExcept::Modes modes; }; } diff --git a/src/Parsers/ASTIntersectOrExcept.cpp b/src/Parsers/ASTIntersectOrExcept.cpp index a05d7ee86c9..7d92055646c 100644 --- a/src/Parsers/ASTIntersectOrExcept.cpp +++ b/src/Parsers/ASTIntersectOrExcept.cpp @@ -1,5 +1,7 @@ #include #include +#include + namespace DB { @@ -8,21 +10,58 @@ ASTPtr ASTIntersectOrExcept::clone() const { auto res = std::make_shared(*this); res->children.clear(); - res->children.push_back(children[0]->clone()); - res->children.push_back(children[1]->clone()); - res->is_except = is_except; + + res->list_of_selects = list_of_selects->clone(); + res->children.push_back(res->list_of_selects); + res->list_of_modes = list_of_modes; + cloneOutputOptions(*res); return res; } void ASTIntersectOrExcept::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - children[0]->formatImpl(settings, state, frame); std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") - << (is_except ? "EXCEPT" : "INTERSECT") - << (settings.hilite ? hilite_none : "") << settings.nl_or_ws; - children[1]->formatImpl(settings, state, frame); + + auto mode_to_str = [&](auto mode) + { + if (mode == Mode::INTERSECT) + return "INTERSECT"; + else + return "EXCEPT"; + }; + + for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it) + { + if (it != list_of_selects->children.begin()) + { + settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") + << mode_to_str(list_of_modes[it - list_of_selects->children.begin() - 1]) + << (settings.hilite ? hilite_none : ""); + } + + if (auto * node = (*it)->as()) + { + settings.ostr << settings.nl_or_ws << indent_str; + + if (node->list_of_selects->children.size() == 1) + { + (node->list_of_selects->children.at(0))->formatImpl(settings, state, frame); + } + else + { + auto sub_query = std::make_shared(); + sub_query->children.push_back(*it); + sub_query->formatImpl(settings, state, frame); + } + } + else + { + if (it != list_of_selects->children.begin()) + settings.ostr << settings.nl_or_ws; + (*it)->formatImpl(settings, state, frame); + } + } } } diff --git a/src/Parsers/ASTIntersectOrExcept.h b/src/Parsers/ASTIntersectOrExcept.h index a02cb9f7d77..0eb8ab3b113 100644 --- a/src/Parsers/ASTIntersectOrExcept.h +++ b/src/Parsers/ASTIntersectOrExcept.h @@ -9,10 +9,22 @@ namespace DB class ASTIntersectOrExcept : public ASTQueryWithOutput { public: - String getID(char) const override { return is_except ? "Except" : "Intersect"; } + String getID(char) const override { return "IntersectExceptQuery"; } + ASTPtr clone() const override; + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - bool is_except; + + enum class Mode + { + INTERSECT, + EXCEPT + }; + + using Modes = std::vector; + + ASTPtr list_of_selects; + Modes list_of_modes; }; } diff --git a/src/Parsers/ParserIntersectOrExcept.cpp b/src/Parsers/ParserIntersectOrExcept.cpp index 6d5da54fa38..fd1511b8af6 100644 --- a/src/Parsers/ParserIntersectOrExcept.cpp +++ b/src/Parsers/ParserIntersectOrExcept.cpp @@ -4,41 +4,62 @@ #include #include #include +#include +#include + namespace DB { + bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword intersect_keyword("INTERSECT"); ParserKeyword except_keyword("EXCEPT"); - ASTPtr left_node; - ASTPtr right_node; - auto ast = std::make_shared(); - ast->is_except = false; + ASTs elements; + ASTIntersectOrExcept::Modes modes; - if (!ParserSelectWithUnionQuery().parse(pos, left_node, expected) && !ParserSubquery().parse(pos, left_node, expected)) - return false; - - if (!intersect_keyword.ignore(pos)) + auto parse_element = [&]() -> bool { - if (!except_keyword.ignore(pos)) - { + ASTPtr element; + if (!ParserSelectWithUnionQuery().parse(pos, element, expected) && !ParserSubquery().parse(pos, element, expected)) return false; - } - else - { - ast->is_except = true; - } - } - if (!ParserSelectWithUnionQuery().parse(pos, right_node, expected) && !ParserSubquery().parse(pos, right_node, expected)) + elements.push_back(element); + return true; + }; + + auto parse_separator = [&]() -> bool + { + if (!intersect_keyword.ignore(pos)) + { + if (!except_keyword.ignore(pos)) + return false; + + modes.emplace_back(ASTIntersectOrExcept::Mode::EXCEPT); + return true; + } + + modes.emplace_back(ASTIntersectOrExcept::Mode::INTERSECT); + return true; + }; + + if (!ParserUnionList::parseUtil(pos, parse_element, parse_separator)) return false; - ast->children.push_back(left_node); - ast->children.push_back(right_node); + if (modes.empty()) + return false; + + auto list_node = std::make_shared(); + list_node->children = std::move(elements); + + auto intersect_or_except_ast = std::make_shared(); + + node = intersect_or_except_ast; + intersect_or_except_ast->list_of_selects = list_node; + intersect_or_except_ast->children.push_back(intersect_or_except_ast->list_of_selects); + intersect_or_except_ast->list_of_modes = modes; - node = ast; return true; } diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index f04885f4640..b1b5c1b8813 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -23,8 +23,8 @@ Block IntersectOrExceptStep::checkHeaders(const DataStreams & input_streams_) co return res; } -IntersectOrExceptStep::IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, size_t max_threads_) - : is_except(is_except_), header(checkHeaders(input_streams_)), max_threads(max_threads_) +IntersectOrExceptStep::IntersectOrExceptStep(DataStreams input_streams_, const Modes & modes_, size_t max_threads_) + : header(checkHeaders(input_streams_)), modes(modes_), max_threads(max_threads_) { input_streams = std::move(input_streams_); if (input_streams.size() == 1) @@ -63,8 +63,9 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, } } + std::cerr << "size: " << input_streams.size() << std::endl; *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); - pipeline->addTransform(std::make_shared(is_except, header)); + pipeline->addTransform(std::make_shared(header, modes)); processors = collector.detachProcessors(); return pipeline; diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index 7938a9adad5..4eceb820153 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -1,16 +1,20 @@ #pragma once #include +#include + namespace DB { class IntersectOrExceptStep : public IQueryPlanStep { +using Modes = ASTIntersectOrExcept::Modes; + public: /// max_threads is used to limit the number of threads for result pipeline. - IntersectOrExceptStep(bool is_except_, DataStreams input_streams_, size_t max_threads_ = 0); + IntersectOrExceptStep(DataStreams input_streams_, const Modes & modes_, size_t max_threads_ = 0); - String getName() const override { return is_except ? "Except" : "Intersect"; } + String getName() const override { return "IntersectExcept"; } QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; @@ -19,8 +23,8 @@ public: private: Block checkHeaders(const DataStreams & input_streams_) const; - bool is_except; Block header; + Modes modes; size_t max_threads; Processors processors; }; diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index e5e8ff705c8..70a86855992 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -4,9 +4,11 @@ namespace DB { -IntersectOrExceptTransform::IntersectOrExceptTransform(bool is_except_, const Block & header_) - : IProcessor(InputPorts(2, header_), {header_}) - , is_except(is_except_) +IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, const Modes & modes_) + : IProcessor(InputPorts(modes_.size() + 1, header_), {header_}) + , modes(modes_) + , first_input(inputs.begin()) + , second_input(std::next(inputs.begin())) { const Names & columns = header_.getNames(); size_t num_columns = columns.empty() ? header_.columns() : columns.size(); @@ -14,8 +16,7 @@ IntersectOrExceptTransform::IntersectOrExceptTransform(bool is_except_, const Bl key_columns_pos.reserve(columns.size()); for (size_t i = 0; i < num_columns; ++i) { - auto pos = columns.empty() ? i - : header_.getPositionByName(columns[i]); + auto pos = columns.empty() ? i : header_.getPositionByName(columns[i]); key_columns_pos.emplace_back(pos); } } @@ -40,7 +41,7 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() } /// Output if has data. - if (current_output_chunk) + if (current_output_chunk && second_input == inputs.end()) { output.push(std::move(current_output_chunk)); } @@ -53,28 +54,50 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() if (finished_second_input) { - if (inputs.front().isFinished()) + if (first_input->isFinished() || (more && !current_input_chunk)) { - output.finish(); - return Status::Finished; + std::advance(second_input, 1); + + if (second_input == inputs.end()) + { + if (current_output_chunk) + { + output.push(std::move(current_output_chunk)); + } + output.finish(); + return Status::Finished; + } + else + { + more = true; + data.reset(); + finished_second_input = false; + ++current_operator_pos; + } } } - else if (inputs.back().isFinished()) + else if (second_input->isFinished()) { finished_second_input = true; } - InputPort & input = finished_second_input ? inputs.front() : inputs.back(); + InputPort & input = finished_second_input ? *first_input : *second_input; /// Check can input. if (!has_input) { - input.setNeeded(); + if (finished_second_input && more) + { + current_input_chunk = std::move(current_output_chunk); + } + else + { + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + current_input_chunk = input.pull(); + } - if (!input.hasData()) - return Status::NeedData; - - current_input_chunk = input.pull(); has_input = true; } @@ -84,6 +107,9 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() void IntersectOrExceptTransform::work() { + if (!data) + data.emplace(); + if (!finished_second_input) { accumulate(std::move(current_input_chunk)); @@ -118,7 +144,7 @@ size_t IntersectOrExceptTransform::buildFilter( for (size_t i = 0; i < rows; ++i) { auto find_result = state.findKey(method.data, i, variants.string_pool); - filter[i] = is_except ? !find_result.isFound() : find_result.isFound(); + filter[i] = modes[current_operator_pos] == ASTIntersectOrExcept::Mode::EXCEPT ? !find_result.isFound() : find_result.isFound(); if (filter[i]) ++new_rows_num; } @@ -137,16 +163,17 @@ void IntersectOrExceptTransform::accumulate(Chunk chunk) for (auto pos : key_columns_pos) column_ptrs.emplace_back(columns[pos].get()); - if (data.empty()) - data.init(SetVariants::chooseMethod(column_ptrs, key_sizes)); + if (data->empty()) + data->init(SetVariants::chooseMethod(column_ptrs, key_sizes)); - switch (data.type) + auto & data_set = *data; + switch (data->type) { case SetVariants::Type::EMPTY: break; #define M(NAME) \ case SetVariants::Type::NAME: \ - addToSet(*data.NAME, column_ptrs, num_rows, data); \ + addToSet(*data_set.NAME, column_ptrs, num_rows, data_set); \ break; APPLY_FOR_SET_VARIANTS(M) #undef M @@ -165,19 +192,20 @@ void IntersectOrExceptTransform::filter(Chunk & chunk) for (auto pos : key_columns_pos) column_ptrs.emplace_back(columns[pos].get()); - if (data.empty()) - data.init(SetVariants::chooseMethod(column_ptrs, key_sizes)); + if (data->empty()) + data->init(SetVariants::chooseMethod(column_ptrs, key_sizes)); IColumn::Filter filter(num_rows); size_t new_rows_num = 0; - switch (data.type) + auto & data_set = *data; + switch (data->type) { case SetVariants::Type::EMPTY: break; #define M(NAME) \ case SetVariants::Type::NAME: \ - new_rows_num = buildFilter(*data.NAME, column_ptrs, filter, num_rows, data); \ + new_rows_num = buildFilter(*data_set.NAME, column_ptrs, filter, num_rows, data_set); \ break; APPLY_FOR_SET_VARIANTS(M) #undef M diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.h b/src/Processors/Transforms/IntersectOrExceptTransform.h index 3c2b9581d6d..5b62ef65d8d 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.h +++ b/src/Processors/Transforms/IntersectOrExceptTransform.h @@ -3,16 +3,20 @@ #include #include #include +#include + namespace DB { class IntersectOrExceptTransform : public IProcessor { -public: - IntersectOrExceptTransform(bool is_except_, const Block & header_); +using Modes = ASTIntersectOrExcept::Modes; - String getName() const override { return is_except ? "Except" : "Intersect"; } +public: + IntersectOrExceptTransform(const Block & header_, const Modes & modes); + + String getName() const override { return "IntersectExcept"; } protected: Status prepare() override; @@ -20,15 +24,22 @@ protected: void work() override; private: - bool is_except; + Modes modes; + InputPorts::iterator first_input; + InputPorts::iterator second_input; + size_t current_operator_pos = 0; bool push_empty_chunk = false; Chunk empty_chunk; + ColumnNumbers key_columns_pos; - SetVariants data; + std::optional data; Sizes key_sizes; + Chunk current_input_chunk; Chunk current_output_chunk; + bool more = false; + bool finished_second_input = false; bool has_input = false; diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index 763a5872cce..d17216a5ec4 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -23,3 +23,33 @@ select number, number+10 from numbers(12) except select number+5, number+15 from 2 12 3 13 4 14 +select 1 except select 2 intersect select 1; +1 +select 1 except select 2 intersect select 2; +select 1 intersect select 1 except select 2; +1 +select 1 intersect select 1 except select 1; +select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 1; +1 +select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2; +select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20); +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index d0416fd899e..971aa262070 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -3,6 +3,16 @@ select 1 intersect select 1; select 2 intersect select 1; select 1 except select 1; select 2 except select 1; + select number from numbers(5, 5) intersect select number from numbers(20); select number from numbers(10) except select number from numbers(5); select number, number+10 from numbers(12) except select number+5, number+15 from numbers(10); + +select 1 except select 2 intersect select 1; +select 1 except select 2 intersect select 2; +select 1 intersect select 1 except select 2; +select 1 intersect select 1 except select 1; +select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 1; +select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2; + +select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20); From 2306fbe9be189d9066eae32609d80a59585656da Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 10 Aug 2021 10:23:18 +0300 Subject: [PATCH 13/32] Better --- .../InterpreterIntersectOrExcept.cpp | 11 ++--- .../InterpreterIntersectOrExcept.h | 4 +- src/Parsers/ASTIntersectOrExcept.cpp | 8 ++-- src/Parsers/ASTIntersectOrExcept.h | 6 +-- ...t.cpp => ParserIntersectOrExceptQuery.cpp} | 17 ++++--- ...xcept.h => ParserIntersectOrExceptQuery.h} | 2 +- src/Parsers/ParserQueryWithOutput.cpp | 6 +-- .../QueryPlan/IntersectOrExceptStep.cpp | 14 +++--- .../QueryPlan/IntersectOrExceptStep.h | 8 ++-- .../Transforms/IntersectOrExceptTransform.cpp | 48 +++++++++---------- .../Transforms/IntersectOrExceptTransform.h | 13 ++--- 11 files changed, 66 insertions(+), 71 deletions(-) rename src/Parsers/{ParserIntersectOrExcept.cpp => ParserIntersectOrExceptQuery.cpp} (73%) rename src/Parsers/{ParserIntersectOrExcept.h => ParserIntersectOrExceptQuery.h} (80%) diff --git a/src/Interpreters/InterpreterIntersectOrExcept.cpp b/src/Interpreters/InterpreterIntersectOrExcept.cpp index d706ab02bfd..52dcb24ed27 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.cpp +++ b/src/Interpreters/InterpreterIntersectOrExcept.cpp @@ -23,15 +23,10 @@ InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ : context(Context::createCopy(context_)) { ASTIntersectOrExcept * ast = query_ptr->as(); + operators = ast->list_of_operators; + auto children = ast->list_of_selects->children; - modes = ast->list_of_modes; - if (modes.size() + 1 != children.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of modes and number of children are not consistent"); - size_t num_children = children.size(); - if (!num_children) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No children in ASTIntersectOrExceptQuery"); - nested_interpreters.resize(num_children); for (size_t i = 0; i < num_children; ++i) @@ -106,7 +101,7 @@ void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) } auto max_threads = context->getSettingsRef().max_threads; - auto step = std::make_unique(std::move(data_streams), modes, max_threads); + auto step = std::make_unique(std::move(data_streams), operators, max_threads); query_plan.unitePlans(std::move(step), std::move(plans)); } diff --git a/src/Interpreters/InterpreterIntersectOrExcept.h b/src/Interpreters/InterpreterIntersectOrExcept.h index 8021a25df06..359be05db8b 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.h +++ b/src/Interpreters/InterpreterIntersectOrExcept.h @@ -21,7 +21,7 @@ public: BlockIO execute() override; private: - String getName() const { return "IntersectExcept"; } + String getName() const { return "IntersectOrExcept"; } Block getCommonHeader(const Blocks & headers) const; @@ -33,7 +33,7 @@ private: ContextPtr context; Block result_header; std::vector> nested_interpreters; - ASTIntersectOrExcept::Modes modes; + ASTIntersectOrExcept::Operators operators; }; } diff --git a/src/Parsers/ASTIntersectOrExcept.cpp b/src/Parsers/ASTIntersectOrExcept.cpp index 7d92055646c..33ffb76c2f7 100644 --- a/src/Parsers/ASTIntersectOrExcept.cpp +++ b/src/Parsers/ASTIntersectOrExcept.cpp @@ -13,7 +13,7 @@ ASTPtr ASTIntersectOrExcept::clone() const res->list_of_selects = list_of_selects->clone(); res->children.push_back(res->list_of_selects); - res->list_of_modes = list_of_modes; + res->list_of_operators = list_of_operators; cloneOutputOptions(*res); return res; @@ -23,9 +23,9 @@ void ASTIntersectOrExcept::formatQueryImpl(const FormatSettings & settings, Form { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - auto mode_to_str = [&](auto mode) + auto operator_to_str = [&](auto current_operator) { - if (mode == Mode::INTERSECT) + if (current_operator == Operator::INTERSECT) return "INTERSECT"; else return "EXCEPT"; @@ -36,7 +36,7 @@ void ASTIntersectOrExcept::formatQueryImpl(const FormatSettings & settings, Form if (it != list_of_selects->children.begin()) { settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") - << mode_to_str(list_of_modes[it - list_of_selects->children.begin() - 1]) + << operator_to_str(list_of_operators[it - list_of_selects->children.begin() - 1]) << (settings.hilite ? hilite_none : ""); } diff --git a/src/Parsers/ASTIntersectOrExcept.h b/src/Parsers/ASTIntersectOrExcept.h index 0eb8ab3b113..9adfdedc497 100644 --- a/src/Parsers/ASTIntersectOrExcept.h +++ b/src/Parsers/ASTIntersectOrExcept.h @@ -15,16 +15,16 @@ public: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - enum class Mode + enum class Operator { INTERSECT, EXCEPT }; - using Modes = std::vector; + using Operators = std::vector; ASTPtr list_of_selects; - Modes list_of_modes; + Operators list_of_operators; }; } diff --git a/src/Parsers/ParserIntersectOrExcept.cpp b/src/Parsers/ParserIntersectOrExceptQuery.cpp similarity index 73% rename from src/Parsers/ParserIntersectOrExcept.cpp rename to src/Parsers/ParserIntersectOrExceptQuery.cpp index fd1511b8af6..ef6d68f8534 100644 --- a/src/Parsers/ParserIntersectOrExcept.cpp +++ b/src/Parsers/ParserIntersectOrExceptQuery.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -11,13 +11,13 @@ namespace DB { -bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserIntersectOrExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword intersect_keyword("INTERSECT"); ParserKeyword except_keyword("EXCEPT"); ASTs elements; - ASTIntersectOrExcept::Modes modes; + ASTIntersectOrExcept::Operators operators; auto parse_element = [&]() -> bool { @@ -36,18 +36,21 @@ bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & exp if (!except_keyword.ignore(pos)) return false; - modes.emplace_back(ASTIntersectOrExcept::Mode::EXCEPT); + operators.emplace_back(ASTIntersectOrExcept::Operator::EXCEPT); return true; } - modes.emplace_back(ASTIntersectOrExcept::Mode::INTERSECT); + operators.emplace_back(ASTIntersectOrExcept::Operator::INTERSECT); return true; }; if (!ParserUnionList::parseUtil(pos, parse_element, parse_separator)) return false; - if (modes.empty()) + if (operators.empty() || elements.empty()) + return false; + + if (operators.size() + 1 != elements.size()) return false; auto list_node = std::make_shared(); @@ -58,7 +61,7 @@ bool ParserIntersectOrExcept::parseImpl(Pos & pos, ASTPtr & node, Expected & exp node = intersect_or_except_ast; intersect_or_except_ast->list_of_selects = list_node; intersect_or_except_ast->children.push_back(intersect_or_except_ast->list_of_selects); - intersect_or_except_ast->list_of_modes = modes; + intersect_or_except_ast->list_of_operators = operators; return true; } diff --git a/src/Parsers/ParserIntersectOrExcept.h b/src/Parsers/ParserIntersectOrExceptQuery.h similarity index 80% rename from src/Parsers/ParserIntersectOrExcept.h rename to src/Parsers/ParserIntersectOrExceptQuery.h index 61cc74cf0a9..d8ba82ba053 100644 --- a/src/Parsers/ParserIntersectOrExcept.h +++ b/src/Parsers/ParserIntersectOrExceptQuery.h @@ -4,7 +4,7 @@ namespace DB { -class ParserIntersectOrExcept : public IParserBase +class ParserIntersectOrExceptQuery : public IParserBase { protected: const char * getName() const override { return "INTERSECT or EXCEPT"; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 35355b29ebf..d7d87cac9b9 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,7 @@ namespace DB bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserShowTablesQuery show_tables_p; - ParserIntersectOrExcept intersect_p; + ParserIntersectOrExceptQuery intersect_except_p; ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; @@ -55,7 +55,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool parsed = explain_p.parse(pos, query, expected) - || intersect_p.parse(pos, query, expected) + || intersect_except_p.parse(pos, query, expected) || select_p.parse(pos, query, expected) || show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p` || show_tables_p.parse(pos, query, expected) diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index b1b5c1b8813..76f496ba47c 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -14,17 +14,20 @@ namespace DB Block IntersectOrExceptStep::checkHeaders(const DataStreams & input_streams_) const { if (input_streams_.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform {} on empty set of query plan steps", getName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform intersect/except on empty set of query plan steps"); Block res = input_streams_.front().header; for (const auto & stream : input_streams_) - assertBlocksHaveEqualStructure(stream.header, res, "IntersectOrExceptStep"); + assertBlocksHaveEqualStructure(stream.header, res, "IntersectExceptStep"); return res; } -IntersectOrExceptStep::IntersectOrExceptStep(DataStreams input_streams_, const Modes & modes_, size_t max_threads_) - : header(checkHeaders(input_streams_)), modes(modes_), max_threads(max_threads_) +IntersectOrExceptStep::IntersectOrExceptStep( + DataStreams input_streams_ , const Operators & operators_ , size_t max_threads_) + : header(checkHeaders(input_streams_)) + , operators(operators_) + , max_threads(max_threads_) { input_streams = std::move(input_streams_); if (input_streams.size() == 1) @@ -63,9 +66,8 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, } } - std::cerr << "size: " << input_streams.size() << std::endl; *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); - pipeline->addTransform(std::make_shared(header, modes)); + pipeline->addTransform(std::make_shared(header, operators)); processors = collector.detachProcessors(); return pipeline; diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index 4eceb820153..914a7dce197 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -8,13 +8,13 @@ namespace DB class IntersectOrExceptStep : public IQueryPlanStep { -using Modes = ASTIntersectOrExcept::Modes; +using Operators = ASTIntersectOrExcept::Operators; public: /// max_threads is used to limit the number of threads for result pipeline. - IntersectOrExceptStep(DataStreams input_streams_, const Modes & modes_, size_t max_threads_ = 0); + IntersectOrExceptStep(DataStreams input_streams_, const Operators & operators_, size_t max_threads_ = 0); - String getName() const override { return "IntersectExcept"; } + String getName() const override { return "IntersectOrExcept"; } QueryPipelinePtr updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings & settings) override; @@ -24,7 +24,7 @@ private: Block checkHeaders(const DataStreams & input_streams_) const; Block header; - Modes modes; + Operators operators; size_t max_threads; Processors processors; }; diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index 70a86855992..68d5f6a2e5e 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -4,9 +4,13 @@ namespace DB { -IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, const Modes & modes_) - : IProcessor(InputPorts(modes_.size() + 1, header_), {header_}) - , modes(modes_) +/* + * There are always at least two inputs. Number of operators is always number of inputs minus 1. + * input1 {operator1} input2 {operator2} input3 ... +**/ +IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, const Operators & operators_) + : IProcessor(InputPorts(operators_.size() + 1, header_), {header_}) + , operators(operators_) , first_input(inputs.begin()) , second_input(std::next(inputs.begin())) { @@ -30,6 +34,7 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() { for (auto & in : inputs) in.close(); + return Status::Finished; } @@ -37,24 +42,13 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() { for (auto & input : inputs) input.setNotNeeded(); + return Status::PortFull; } - /// Output if has data. - if (current_output_chunk && second_input == inputs.end()) - { - output.push(std::move(current_output_chunk)); - } - - if (push_empty_chunk) - { - output.push(std::move(empty_chunk)); - push_empty_chunk = false; - } - if (finished_second_input) { - if (first_input->isFinished() || (more && !current_input_chunk)) + if (first_input->isFinished() || (use_accumulated_input && !current_input_chunk)) { std::advance(second_input, 1); @@ -64,12 +58,13 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() { output.push(std::move(current_output_chunk)); } + output.finish(); return Status::Finished; } else { - more = true; + use_accumulated_input = true; data.reset(); finished_second_input = false; ++current_operator_pos; @@ -81,20 +76,20 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() finished_second_input = true; } - InputPort & input = finished_second_input ? *first_input : *second_input; - - /// Check can input. if (!has_input) { - if (finished_second_input && more) + if (finished_second_input && use_accumulated_input) { current_input_chunk = std::move(current_output_chunk); } else { + InputPort & input = finished_second_input ? *first_input : *second_input; + input.setNeeded(); if (!input.hasData()) return Status::NeedData; + current_input_chunk = input.pull(); } @@ -107,9 +102,6 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() void IntersectOrExceptTransform::work() { - if (!data) - data.emplace(); - if (!finished_second_input) { accumulate(std::move(current_input_chunk)); @@ -144,7 +136,7 @@ size_t IntersectOrExceptTransform::buildFilter( for (size_t i = 0; i < rows; ++i) { auto find_result = state.findKey(method.data, i, variants.string_pool); - filter[i] = modes[current_operator_pos] == ASTIntersectOrExcept::Mode::EXCEPT ? !find_result.isFound() : find_result.isFound(); + filter[i] = operators[current_operator_pos] == ASTIntersectOrExcept::Operator::EXCEPT ? !find_result.isFound() : find_result.isFound(); if (filter[i]) ++new_rows_num; } @@ -163,6 +155,9 @@ void IntersectOrExceptTransform::accumulate(Chunk chunk) for (auto pos : key_columns_pos) column_ptrs.emplace_back(columns[pos].get()); + if (!data) + data.emplace(); + if (data->empty()) data->init(SetVariants::chooseMethod(column_ptrs, key_sizes)); @@ -192,6 +187,9 @@ void IntersectOrExceptTransform::filter(Chunk & chunk) for (auto pos : key_columns_pos) column_ptrs.emplace_back(columns[pos].get()); + if (!data) + data.emplace(); + if (data->empty()) data->init(SetVariants::chooseMethod(column_ptrs, key_sizes)); diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.h b/src/Processors/Transforms/IntersectOrExceptTransform.h index 5b62ef65d8d..6d0c3516d5d 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.h +++ b/src/Processors/Transforms/IntersectOrExceptTransform.h @@ -11,12 +11,12 @@ namespace DB class IntersectOrExceptTransform : public IProcessor { -using Modes = ASTIntersectOrExcept::Modes; +using Operators = ASTIntersectOrExcept::Operators; public: - IntersectOrExceptTransform(const Block & header_, const Modes & modes); + IntersectOrExceptTransform(const Block & header_, const Operators & operators); - String getName() const override { return "IntersectExcept"; } + String getName() const override { return "IntersectOrExcept"; } protected: Status prepare() override; @@ -24,22 +24,19 @@ protected: void work() override; private: - Modes modes; + Operators operators; InputPorts::iterator first_input; InputPorts::iterator second_input; size_t current_operator_pos = 0; - bool push_empty_chunk = false; - Chunk empty_chunk; - ColumnNumbers key_columns_pos; std::optional data; Sizes key_sizes; Chunk current_input_chunk; Chunk current_output_chunk; - bool more = false; + bool use_accumulated_input = false; bool finished_second_input = false; bool has_input = false; From a549e29bd4152ac45a2023c739e2e685eb9c7be4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 12 Aug 2021 14:42:51 +0300 Subject: [PATCH 14/32] Better --- src/Interpreters/InterpreterFactory.cpp | 8 +- .../InterpreterIntersectOrExcept.h | 39 --------- ...InterpreterSelectIntersectExceptQuery.cpp} | 77 ++++++++++------- .../InterpreterSelectIntersectExceptQuery.h | 45 ++++++++++ .../InterpreterSelectWithUnionQuery.cpp | 10 ++- .../SelectIntersectExceptQueryVisitor.cpp | 85 +++++++++++++++++++ .../SelectIntersectExceptQueryVisitor.h | 47 ++++++++++ src/Interpreters/executeQuery.cpp | 15 +++- ....cpp => ASTSelectIntersectExceptQuery.cpp} | 19 +++-- ...cept.h => ASTSelectIntersectExceptQuery.h} | 10 ++- src/Parsers/ParserQueryWithOutput.cpp | 4 +- ...p => ParserSelectIntersectExceptQuery.cpp} | 18 ++-- ...y.h => ParserSelectIntersectExceptQuery.h} | 2 +- src/Parsers/ParserUnionQueryElement.cpp | 5 +- .../QueryPlan/IntersectOrExceptStep.cpp | 13 +-- .../QueryPlan/IntersectOrExceptStep.h | 10 +-- .../Transforms/IntersectOrExceptTransform.cpp | 75 +++++++--------- .../Transforms/IntersectOrExceptTransform.h | 12 +-- ...02004_intersect_except_operators.reference | 25 ++++++ .../02004_intersect_except_operators.sql | 10 +++ 20 files changed, 363 insertions(+), 166 deletions(-) delete mode 100644 src/Interpreters/InterpreterIntersectOrExcept.h rename src/Interpreters/{InterpreterIntersectOrExcept.cpp => InterpreterSelectIntersectExceptQuery.cpp} (68%) create mode 100644 src/Interpreters/InterpreterSelectIntersectExceptQuery.h create mode 100644 src/Interpreters/SelectIntersectExceptQueryVisitor.cpp create mode 100644 src/Interpreters/SelectIntersectExceptQueryVisitor.h rename src/Parsers/{ASTIntersectOrExcept.cpp => ASTSelectIntersectExceptQuery.cpp} (76%) rename src/Parsers/{ASTIntersectOrExcept.h => ASTSelectIntersectExceptQuery.h} (55%) rename src/Parsers/{ParserIntersectOrExceptQuery.cpp => ParserSelectIntersectExceptQuery.cpp} (66%) rename src/Parsers/{ParserIntersectOrExceptQuery.h => ParserSelectIntersectExceptQuery.h} (79%) diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index e634c072841..e5b381b4d08 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include @@ -111,9 +111,9 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut ProfileEvents::increment(ProfileEvents::SelectQuery); return std::make_unique(query, context, options); } - else if (query->as()) + else if (query->as()) { - return std::make_unique(query, context); + return std::make_unique(query, context, options); } else if (query->as()) { diff --git a/src/Interpreters/InterpreterIntersectOrExcept.h b/src/Interpreters/InterpreterIntersectOrExcept.h deleted file mode 100644 index 359be05db8b..00000000000 --- a/src/Interpreters/InterpreterIntersectOrExcept.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -class Context; -class InterpreterSelectQuery; -class QueryPlan; - -class InterpreterIntersectOrExcept : public IInterpreter -{ -public: - InterpreterIntersectOrExcept(const ASTPtr & query_ptr_, ContextPtr context_); - - BlockIO execute() override; - -private: - String getName() const { return "IntersectOrExcept"; } - - Block getCommonHeader(const Blocks & headers) const; - - std::unique_ptr - buildCurrentChildInterpreter(const ASTPtr & ast_ptr_); - - void buildQueryPlan(QueryPlan & query_plan); - - ContextPtr context; - Block result_header; - std::vector> nested_interpreters; - ASTIntersectOrExcept::Operators operators; -}; - -} diff --git a/src/Interpreters/InterpreterIntersectOrExcept.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp similarity index 68% rename from src/Interpreters/InterpreterIntersectOrExcept.cpp rename to src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 52dcb24ed27..9e24dd6e6a0 100644 --- a/src/Interpreters/InterpreterIntersectOrExcept.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -1,8 +1,8 @@ #include #include -#include +#include #include -#include +#include #include #include #include @@ -19,27 +19,7 @@ namespace ErrorCodes extern const int INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH; } -InterpreterIntersectOrExcept::InterpreterIntersectOrExcept(const ASTPtr & query_ptr, ContextPtr context_) - : context(Context::createCopy(context_)) -{ - ASTIntersectOrExcept * ast = query_ptr->as(); - operators = ast->list_of_operators; - - auto children = ast->list_of_selects->children; - size_t num_children = children.size(); - nested_interpreters.resize(num_children); - - for (size_t i = 0; i < num_children; ++i) - nested_interpreters[i] = buildCurrentChildInterpreter(children.at(i)); - - Blocks headers(num_children); - for (size_t query_num = 0; query_num < num_children; ++query_num) - headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); - - result_header = getCommonHeader(headers); -} - -Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) const +static Block getCommonHeader(const Blocks & headers) { size_t num_selects = headers.size(); Block common_header = headers.front(); @@ -49,8 +29,8 @@ Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) cons { if (headers[query_num].columns() != num_columns) throw Exception(ErrorCodes::INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH, - "Different number of columns in {} elements:\n {} \nand\n {}", - getName(), common_header.dumpNames(), headers[query_num].dumpNames()); + "Different number of columns in IntersectExceptQuery elements:\n {} \nand\n {}", + common_header.dumpNames(), headers[query_num].dumpNames()); } std::vector columns(num_selects); @@ -66,16 +46,53 @@ Block InterpreterIntersectOrExcept::getCommonHeader(const Blocks & headers) cons return common_header; } +InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery( + const ASTPtr & query_ptr_, + ContextPtr context_, + const SelectQueryOptions & options_) + : IInterpreterUnionOrSelectQuery(query_ptr_->clone(), context_, options_) +{ + ASTSelectIntersectExceptQuery * ast = query_ptr->as(); + final_operator = ast->final_operator; + + const auto & children = ast->children[0]->children; + size_t num_children = children.size(); + + /// AST must have been changed by the visitor. + if (final_operator == Operator::UNKNOWN || num_children != 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "SelectIntersectExceptyQuery has not been normalized (number of children: {})", + num_children); + + nested_interpreters.resize(num_children); + + for (size_t i = 0; i < num_children; ++i) + nested_interpreters[i] = buildCurrentChildInterpreter(children.at(i)); + + Blocks headers(num_children); + for (size_t query_num = 0; query_num < num_children; ++query_num) + headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); + + result_header = getCommonHeader(headers); +} + std::unique_ptr -InterpreterIntersectOrExcept::buildCurrentChildInterpreter(const ASTPtr & ast_ptr_) +InterpreterSelectIntersectExceptQuery::buildCurrentChildInterpreter(const ASTPtr & ast_ptr_) { if (ast_ptr_->as()) return std::make_unique(ast_ptr_, context, SelectQueryOptions()); - else + + if (ast_ptr_->as()) return std::make_unique(ast_ptr_, context, SelectQueryOptions()); + + if (ast_ptr_->as()) + return std::make_unique(ast_ptr_, context, SelectQueryOptions()); + + // if (ast_ptr_->as()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected query: {}", ast_ptr_->getID()); } -void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) +void InterpreterSelectIntersectExceptQuery::buildQueryPlan(QueryPlan & query_plan) { size_t num_plans = nested_interpreters.size(); std::vector> plans(num_plans); @@ -101,11 +118,11 @@ void InterpreterIntersectOrExcept::buildQueryPlan(QueryPlan & query_plan) } auto max_threads = context->getSettingsRef().max_threads; - auto step = std::make_unique(std::move(data_streams), operators, max_threads); + auto step = std::make_unique(std::move(data_streams), final_operator, max_threads); query_plan.unitePlans(std::move(step), std::move(plans)); } -BlockIO InterpreterIntersectOrExcept::execute() +BlockIO InterpreterSelectIntersectExceptQuery::execute() { BlockIO res; diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.h b/src/Interpreters/InterpreterSelectIntersectExceptQuery.h new file mode 100644 index 00000000000..9cbde055b0b --- /dev/null +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +class Context; +class InterpreterSelectQuery; +class QueryPlan; + +class InterpreterSelectIntersectExceptQuery : public IInterpreterUnionOrSelectQuery +{ +using Operator = ASTSelectIntersectExceptQuery::Operator; + +public: + InterpreterSelectIntersectExceptQuery( + const ASTPtr & query_ptr_, + ContextPtr context_, + const SelectQueryOptions & options_); + + BlockIO execute() override; + + Block getSampleBlock() { return result_header; } + +private: + static String getName() { return "SelectIntersectExceptQuery"; } + + std::unique_ptr + buildCurrentChildInterpreter(const ASTPtr & ast_ptr_); + + void buildQueryPlan(QueryPlan & query_plan) override; + + void ignoreWithTotals() override {} + + std::vector> nested_interpreters; + Operator final_operator; +}; + +} diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 3cf4a905d38..cd06f51cb12 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -2,8 +2,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -208,8 +210,10 @@ InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast { if (ast_ptr_->as()) return std::make_unique(ast_ptr_, context, options, current_required_result_column_names); - else + else if (ast_ptr_->as()) return std::make_unique(ast_ptr_, context, options, current_required_result_column_names); + else + return std::make_unique(ast_ptr_, context, options); } InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; @@ -225,10 +229,14 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, } if (is_subquery) + { return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); + } else + { return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); + } } diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp new file mode 100644 index 00000000000..a404bf3da40 --- /dev/null +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -0,0 +1,85 @@ +#include +#include +#include + + +namespace DB +{ + +void SelectIntersectExceptQueryMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * select_intersect_except = ast->as()) + { + std::cerr << "\n\nSelectIntersectExceptVisitor BEFORE:\n" << ast->dumpTree() << std::endl; + data.initialize(select_intersect_except); + visit(*select_intersect_except, data); + std::cerr << "\n\nSelectIntersectExceptVisitor AFTER:\n" << ast->dumpTree() << std::endl; + } +} + +void SelectIntersectExceptQueryMatcher::visit(ASTSelectIntersectExceptQuery & ast, Data & data) +{ + /* Example: select 1 intersect select 1 intsect select 1 intersect select 1 intersect select 1; + * + * --SelectIntersectExceptQuery --SelectIntersectExceptQuery + * ---expressionlist ---ExpressionList + * ----SelectQuery ----SelectIntersectExceptQuery + * ----SelectQuery ------ExpressionList + * ----SelectQuery ---> -------SelectIntersectExceptQuery + * ----SelectQuery --------ExpressionList + * ---------SelectQuery + * ---------SelectQuery + * -------SelectQuery + * ----SelectQuery + **/ + + auto & selects = data.reversed_list_of_selects; + + if (selects.empty()) + return; + + const auto left = selects.back(); + selects.pop_back(); + const auto right = selects.back(); + selects.pop_back(); + + auto & operators = data.reversed_list_of_operators; + const auto current_operator = operators.back(); + operators.pop_back(); + + auto list_node = std::make_shared(); + list_node->children = {left, right}; + + if (selects.empty()) + { + ast.final_operator = current_operator; + ast.children = {std::move(list_node)}; + } + else + { + auto select_intersect_except = std::make_shared(); + select_intersect_except->final_operator = {current_operator}; + select_intersect_except->children.emplace_back(std::move(list_node)); + + selects.emplace_back(std::move(select_intersect_except)); + } + + visit(ast, data); +} + +// void SelectIntersectExceptQueryVisitor::visit(ASTSelectWithUnionQuery & ast, Data & data) +// { +// auto & union_modes = ast.list_of_modes; +// ASTs selects; +// auto & select_list = ast.list_of_selects->children; +// +// +// // reverse children list +// std::reverse(selects.begin(), selects.end()); +// +// ast.is_normalized = true; +// ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL; +// +// ast.list_of_selects->children = std::move(selects); +// } +} diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.h b/src/Interpreters/SelectIntersectExceptQueryVisitor.h new file mode 100644 index 00000000000..58f3071972f --- /dev/null +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.h @@ -0,0 +1,47 @@ +#pragma once + +#include + +#include +#include + +#include +#include + + +namespace DB +{ + +class ASTFunction; + +class SelectIntersectExceptQueryMatcher +{ +public: + struct Data + { + Data() = default; + + void initialize(const ASTSelectIntersectExceptQuery * select_intersect_except) + { + reversed_list_of_selects = select_intersect_except->list_of_selects->clone()->children; + reversed_list_of_operators = select_intersect_except->list_of_operators; + + std::reverse(reversed_list_of_selects.begin(), reversed_list_of_selects.end()); + std::reverse(reversed_list_of_operators.begin(), reversed_list_of_operators.end()); + } + + ASTs reversed_list_of_selects; + ASTSelectIntersectExceptQuery::Operators reversed_list_of_operators; + }; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(ASTPtr & ast, Data &); + static void visit(ASTSelectIntersectExceptQuery &, Data &); + // static void visit(ASTSelectWithUnionQuery &, Data &); +}; + +/// Visit children first. +using SelectIntersectExceptQueryVisitor + = InDepthNodeVisitor; +} diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 1b59f3bc7df..839447a90f7 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -490,9 +491,16 @@ static std::tuple executeQueryImpl( ApplyWithGlobalVisitor().visit(ast); } - /// Normalize SelectWithUnionQuery - NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; - NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); + { + /// Normalize SelectWithUnionQuery + NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); + } + + { + SelectIntersectExceptQueryVisitor::Data data; + SelectIntersectExceptQueryVisitor{data}.visit(ast); + } /// Check the limits. checkASTSizeLimits(*ast, settings); @@ -532,6 +540,7 @@ static std::tuple executeQueryImpl( /// reset Input callbacks if query is not INSERT SELECT context->resetInputCallbacks(); + std::cerr << "\n\nAST: " << ast->dumpTree() << std::endl; auto interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); std::shared_ptr quota; diff --git a/src/Parsers/ASTIntersectOrExcept.cpp b/src/Parsers/ASTSelectIntersectExceptQuery.cpp similarity index 76% rename from src/Parsers/ASTIntersectOrExcept.cpp rename to src/Parsers/ASTSelectIntersectExceptQuery.cpp index 33ffb76c2f7..26fd9353d5b 100644 --- a/src/Parsers/ASTIntersectOrExcept.cpp +++ b/src/Parsers/ASTSelectIntersectExceptQuery.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -6,20 +6,25 @@ namespace DB { -ASTPtr ASTIntersectOrExcept::clone() const +ASTPtr ASTSelectIntersectExceptQuery::clone() const { - auto res = std::make_shared(*this); - res->children.clear(); + auto res = std::make_shared(*this); + + res->children.clear(); + for (const auto & child : children) + res->children.push_back(child->clone()); + + if (res->list_of_selects) + res->list_of_selects = list_of_selects->clone(); - res->list_of_selects = list_of_selects->clone(); - res->children.push_back(res->list_of_selects); res->list_of_operators = list_of_operators; + res->final_operator = final_operator; cloneOutputOptions(*res); return res; } -void ASTIntersectOrExcept::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); diff --git a/src/Parsers/ASTIntersectOrExcept.h b/src/Parsers/ASTSelectIntersectExceptQuery.h similarity index 55% rename from src/Parsers/ASTIntersectOrExcept.h rename to src/Parsers/ASTSelectIntersectExceptQuery.h index 9adfdedc497..8fc5756e370 100644 --- a/src/Parsers/ASTIntersectOrExcept.h +++ b/src/Parsers/ASTSelectIntersectExceptQuery.h @@ -6,17 +6,20 @@ namespace DB { -class ASTIntersectOrExcept : public ASTQueryWithOutput +class ASTSelectIntersectExceptQuery : public ASTQueryWithOutput { public: - String getID(char) const override { return "IntersectExceptQuery"; } + String getID(char) const override { return "SelectIntersectExceptQuery"; } ASTPtr clone() const override; void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + const char * getQueryKindString() const override { return "SelectIntersectExcept"; } + enum class Operator { + UNKNOWN, INTERSECT, EXCEPT }; @@ -25,6 +28,9 @@ public: ASTPtr list_of_selects; Operators list_of_operators; + + /// Final operator after applying visitor. + Operator final_operator = Operator::UNKNOWN; }; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index d7d87cac9b9..e2ab8a84cc1 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -31,7 +31,6 @@ namespace DB bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserShowTablesQuery show_tables_p; - ParserIntersectOrExceptQuery intersect_except_p; ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; @@ -55,7 +54,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool parsed = explain_p.parse(pos, query, expected) - || intersect_except_p.parse(pos, query, expected) || select_p.parse(pos, query, expected) || show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p` || show_tables_p.parse(pos, query, expected) diff --git a/src/Parsers/ParserIntersectOrExceptQuery.cpp b/src/Parsers/ParserSelectIntersectExceptQuery.cpp similarity index 66% rename from src/Parsers/ParserIntersectOrExceptQuery.cpp rename to src/Parsers/ParserSelectIntersectExceptQuery.cpp index ef6d68f8534..b56598166c6 100644 --- a/src/Parsers/ParserIntersectOrExceptQuery.cpp +++ b/src/Parsers/ParserSelectIntersectExceptQuery.cpp @@ -1,9 +1,9 @@ -#include +#include #include #include #include -#include -#include +#include +#include #include #include @@ -11,18 +11,18 @@ namespace DB { -bool ParserIntersectOrExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserSelectIntersectExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword intersect_keyword("INTERSECT"); ParserKeyword except_keyword("EXCEPT"); ASTs elements; - ASTIntersectOrExcept::Operators operators; + ASTSelectIntersectExceptQuery::Operators operators; auto parse_element = [&]() -> bool { ASTPtr element; - if (!ParserSelectWithUnionQuery().parse(pos, element, expected) && !ParserSubquery().parse(pos, element, expected)) + if (!ParserSelectQuery().parse(pos, element, expected) && !ParserSubquery().parse(pos, element, expected)) return false; elements.push_back(element); @@ -36,11 +36,11 @@ bool ParserIntersectOrExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected if (!except_keyword.ignore(pos)) return false; - operators.emplace_back(ASTIntersectOrExcept::Operator::EXCEPT); + operators.emplace_back(ASTSelectIntersectExceptQuery::Operator::EXCEPT); return true; } - operators.emplace_back(ASTIntersectOrExcept::Operator::INTERSECT); + operators.emplace_back(ASTSelectIntersectExceptQuery::Operator::INTERSECT); return true; }; @@ -56,7 +56,7 @@ bool ParserIntersectOrExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected auto list_node = std::make_shared(); list_node->children = std::move(elements); - auto intersect_or_except_ast = std::make_shared(); + auto intersect_or_except_ast = std::make_shared(); node = intersect_or_except_ast; intersect_or_except_ast->list_of_selects = list_node; diff --git a/src/Parsers/ParserIntersectOrExceptQuery.h b/src/Parsers/ParserSelectIntersectExceptQuery.h similarity index 79% rename from src/Parsers/ParserIntersectOrExceptQuery.h rename to src/Parsers/ParserSelectIntersectExceptQuery.h index d8ba82ba053..e01785113a8 100644 --- a/src/Parsers/ParserIntersectOrExceptQuery.h +++ b/src/Parsers/ParserSelectIntersectExceptQuery.h @@ -4,7 +4,7 @@ namespace DB { -class ParserIntersectOrExceptQuery : public IParserBase +class ParserSelectIntersectExceptQuery : public IParserBase { protected: const char * getName() const override { return "INTERSECT or EXCEPT"; } diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index efd022e6362..5abbce25930 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -10,7 +11,9 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) + if (!ParserSubquery().parse(pos, node, expected) + && !ParserSelectIntersectExceptQuery().parse(pos, node, expected) + && !ParserSelectQuery().parse(pos, node, expected)) return false; if (const auto * ast_subquery = node->as()) diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index 76f496ba47c..e61afb5ba2a 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -1,7 +1,8 @@ +#include + #include #include #include -#include #include #include #include @@ -11,22 +12,22 @@ namespace DB { -Block IntersectOrExceptStep::checkHeaders(const DataStreams & input_streams_) const +static Block checkHeaders(const DataStreams & input_streams_) { if (input_streams_.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform intersect/except on empty set of query plan steps"); Block res = input_streams_.front().header; for (const auto & stream : input_streams_) - assertBlocksHaveEqualStructure(stream.header, res, "IntersectExceptStep"); + assertBlocksHaveEqualStructure(stream.header, res, "IntersectOrExceptStep"); return res; } IntersectOrExceptStep::IntersectOrExceptStep( - DataStreams input_streams_ , const Operators & operators_ , size_t max_threads_) + DataStreams input_streams_ , Operator operator_ , size_t max_threads_) : header(checkHeaders(input_streams_)) - , operators(operators_) + , current_operator(operator_) , max_threads(max_threads_) { input_streams = std::move(input_streams_); @@ -67,7 +68,7 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, } *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); - pipeline->addTransform(std::make_shared(header, operators)); + pipeline->addTransform(std::make_shared(header, current_operator)); processors = collector.detachProcessors(); return pipeline; diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index 914a7dce197..002f1b1570c 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB @@ -8,11 +8,11 @@ namespace DB class IntersectOrExceptStep : public IQueryPlanStep { -using Operators = ASTIntersectOrExcept::Operators; +using Operator = ASTSelectIntersectExceptQuery::Operator; public: /// max_threads is used to limit the number of threads for result pipeline. - IntersectOrExceptStep(DataStreams input_streams_, const Operators & operators_, size_t max_threads_ = 0); + IntersectOrExceptStep(DataStreams input_streams_, Operator operators_, size_t max_threads_ = 0); String getName() const override { return "IntersectOrExcept"; } @@ -21,10 +21,8 @@ public: void describePipeline(FormatSettings & settings) const override; private: - Block checkHeaders(const DataStreams & input_streams_) const; - Block header; - Operators operators; + Operator current_operator; size_t max_threads; Processors processors; }; diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index 68d5f6a2e5e..b16032bde8e 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -4,15 +4,14 @@ namespace DB { -/* - * There are always at least two inputs. Number of operators is always number of inputs minus 1. - * input1 {operator1} input2 {operator2} input3 ... -**/ -IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, const Operators & operators_) - : IProcessor(InputPorts(operators_.size() + 1, header_), {header_}) - , operators(operators_) - , first_input(inputs.begin()) - , second_input(std::next(inputs.begin())) +namespace ErrorCodes +{ + extern const int SET_SIZE_LIMIT_EXCEEDED; +} + +IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, Operator operator_) + : IProcessor(InputPorts(2, header_), {header_}) + , current_operator(operator_) { const Names & columns = header_.getNames(); size_t num_columns = columns.empty() ? header_.columns() : columns.size(); @@ -46,53 +45,33 @@ IntersectOrExceptTransform::Status IntersectOrExceptTransform::prepare() return Status::PortFull; } + if (current_output_chunk) + { + output.push(std::move(current_output_chunk)); + } + if (finished_second_input) { - if (first_input->isFinished() || (use_accumulated_input && !current_input_chunk)) + if (inputs.front().isFinished()) { - std::advance(second_input, 1); - - if (second_input == inputs.end()) - { - if (current_output_chunk) - { - output.push(std::move(current_output_chunk)); - } - - output.finish(); - return Status::Finished; - } - else - { - use_accumulated_input = true; - data.reset(); - finished_second_input = false; - ++current_operator_pos; - } + output.finish(); + return Status::Finished; } } - else if (second_input->isFinished()) + else if (inputs.back().isFinished()) { finished_second_input = true; } if (!has_input) { - if (finished_second_input && use_accumulated_input) - { - current_input_chunk = std::move(current_output_chunk); - } - else - { - InputPort & input = finished_second_input ? *first_input : *second_input; + InputPort & input = finished_second_input ? inputs.front() : inputs.back(); - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - - current_input_chunk = input.pull(); - } + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + current_input_chunk = input.pull(); has_input = true; } @@ -136,7 +115,7 @@ size_t IntersectOrExceptTransform::buildFilter( for (size_t i = 0; i < rows; ++i) { auto find_result = state.findKey(method.data, i, variants.string_pool); - filter[i] = operators[current_operator_pos] == ASTIntersectOrExcept::Operator::EXCEPT ? !find_result.isFound() : find_result.isFound(); + filter[i] = current_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT ? !find_result.isFound() : find_result.isFound(); if (filter[i]) ++new_rows_num; } @@ -193,10 +172,11 @@ void IntersectOrExceptTransform::filter(Chunk & chunk) if (data->empty()) data->init(SetVariants::chooseMethod(column_ptrs, key_sizes)); - IColumn::Filter filter(num_rows); - size_t new_rows_num = 0; + + IColumn::Filter filter(num_rows); auto & data_set = *data; + switch (data->type) { case SetVariants::Type::EMPTY: @@ -209,6 +189,9 @@ void IntersectOrExceptTransform::filter(Chunk & chunk) #undef M } + if (!new_rows_num) + return; + for (auto & column : columns) column = column->filter(filter, -1); diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.h b/src/Processors/Transforms/IntersectOrExceptTransform.h index 6d0c3516d5d..da1fa6a119e 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.h +++ b/src/Processors/Transforms/IntersectOrExceptTransform.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB @@ -11,10 +11,10 @@ namespace DB class IntersectOrExceptTransform : public IProcessor { -using Operators = ASTIntersectOrExcept::Operators; +using Operator = ASTSelectIntersectExceptQuery::Operator; public: - IntersectOrExceptTransform(const Block & header_, const Operators & operators); + IntersectOrExceptTransform(const Block & header_, Operator operators); String getName() const override { return "IntersectOrExcept"; } @@ -24,10 +24,7 @@ protected: void work() override; private: - Operators operators; - InputPorts::iterator first_input; - InputPorts::iterator second_input; - size_t current_operator_pos = 0; + Operator current_operator; ColumnNumbers key_columns_pos; std::optional data; @@ -36,7 +33,6 @@ private: Chunk current_input_chunk; Chunk current_output_chunk; - bool use_accumulated_input = false; bool finished_second_input = false; bool has_input = false; diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index d17216a5ec4..c3272a5d574 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -32,6 +32,16 @@ select 1 intersect select 1 except select 1; select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 1; 1 select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2; +select number from numbers(10) except select 5; +0 +1 +2 +3 +4 +6 +7 +8 +9 select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20); 20 21 @@ -53,3 +63,18 @@ select number from numbers(100) intersect select number from numbers(20, 60) exc 57 58 59 +with (select number from numbers(10) intersect select 5) as a select a * 10; +50 +select count() from (select number from numbers(10) except select 5); +9 +select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000)); +600000 +select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20)); +20 +select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000) except select number from numbers(300000, 200000) except select number from numbers(600000, 200000)); +200000 +select 1 union all select 1 intersect select 1; +1 +1 +select 1 union all select 1 intersect select 2; +1 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index 971aa262070..722670732ac 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -15,4 +15,14 @@ select 1 intersect select 1 except select 1; select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 1; select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2; +select number from numbers(10) except select 5; select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20); + +with (select number from numbers(10) intersect select 5) as a select a * 10; +select count() from (select number from numbers(10) except select 5); +select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000)); +select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20)); +select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000) except select number from numbers(300000, 200000) except select number from numbers(600000, 200000)); + +select 1 union all select 1 intersect select 1; +select 1 union all select 1 intersect select 2; From 55650b1e6798f7a06d6fa9da145dee69c0cd2411 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 13 Aug 2021 12:57:15 +0300 Subject: [PATCH 15/32] Change behaviour of except, some fixes --- .../NormalizeSelectWithUnionQueryVisitor.cpp | 4 + .../SelectIntersectExceptQueryVisitor.cpp | 123 +++++++++++++++--- .../SelectIntersectExceptQueryVisitor.h | 2 +- src/Interpreters/executeQuery.cpp | 11 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 7 + src/Parsers/ASTSelectWithUnionQuery.h | 3 +- src/Parsers/ExpressionListParsers.cpp | 8 +- src/Parsers/ExpressionListParsers.h | 9 +- .../ParserSelectIntersectExceptQuery.cpp | 9 +- src/Parsers/ParserSelectWithUnionQuery.cpp | 3 +- .../QueryPlan/IntersectOrExceptStep.cpp | 2 + ...02004_intersect_except_operators.reference | 17 ++- .../02004_intersect_except_operators.sql | 6 +- 13 files changed, 164 insertions(+), 40 deletions(-) diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index 798c2f2e376..bbe1a4e048c 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB @@ -35,6 +36,9 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, ASTs selects; auto & select_list = ast.list_of_selects->children; + if (select_list.size() < 2) + return; + int i; for (i = union_modes.size() - 1; i >= 0; --i) { diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp index a404bf3da40..5926d5f6f10 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -6,14 +6,29 @@ namespace DB { +/* + * Note: there is a difference between intersect and except behaviour. + * `intersect` is supposed to be a part of last SelectQuery, i.e. the sequence with no parenthesis: + * select 1 union all select 2 except select 1 intersect 2 except select 2 union distinct select 5; + * is interpreted as: + * select 1 union all select 2 except (select 1 intersect 2) except select 2 union distinct select 5; + * Whereas `except` is applied to all union part like: + * (((select 1 union all select 2) except (select 1 intersect 2)) except select 2) union distinct select 5; +**/ + void SelectIntersectExceptQueryMatcher::visit(ASTPtr & ast, Data & data) { if (auto * select_intersect_except = ast->as()) { - std::cerr << "\n\nSelectIntersectExceptVisitor BEFORE:\n" << ast->dumpTree() << std::endl; + if (select_intersect_except->final_operator != ASTSelectIntersectExceptQuery::Operator::UNKNOWN) + return; + data.initialize(select_intersect_except); visit(*select_intersect_except, data); - std::cerr << "\n\nSelectIntersectExceptVisitor AFTER:\n" << ast->dumpTree() << std::endl; + } + else if (auto * select_union = ast->as()) + { + visit(*select_union, data); } } @@ -22,7 +37,7 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectIntersectExceptQuery & as /* Example: select 1 intersect select 1 intsect select 1 intersect select 1 intersect select 1; * * --SelectIntersectExceptQuery --SelectIntersectExceptQuery - * ---expressionlist ---ExpressionList + * ---ExpressionList ---ExpressionList * ----SelectQuery ----SelectIntersectExceptQuery * ----SelectQuery ------ExpressionList * ----SelectQuery ---> -------SelectIntersectExceptQuery @@ -59,7 +74,8 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectIntersectExceptQuery & as { auto select_intersect_except = std::make_shared(); select_intersect_except->final_operator = {current_operator}; - select_intersect_except->children.emplace_back(std::move(list_node)); + select_intersect_except->list_of_selects = std::move(list_node); + select_intersect_except->children.push_back(select_intersect_except->list_of_selects); selects.emplace_back(std::move(select_intersect_except)); } @@ -67,19 +83,88 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectIntersectExceptQuery & as visit(ast, data); } -// void SelectIntersectExceptQueryVisitor::visit(ASTSelectWithUnionQuery & ast, Data & data) -// { -// auto & union_modes = ast.list_of_modes; -// ASTs selects; -// auto & select_list = ast.list_of_selects->children; -// -// -// // reverse children list -// std::reverse(selects.begin(), selects.end()); -// -// ast.is_normalized = true; -// ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL; -// -// ast.list_of_selects->children = std::move(selects); -// } +void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data &) +{ + /* Example: select 1 union all select 2 except select 1 except select 2 union distinct select 5; + * + * --SelectWithUnionQuery --SelectIntersectExceptQuery + * ---ExpressionList ---ExpressionList + * ----SelectQuery ----SelectIntersectExceptQuery + * ----SelectQuery -----ExpressionList + * ----SelectQuery (except) ---> ------SelectIntersectExceptQuery + * ----SelectQuery (except) -------ExpressionList + * ----SelectQuery --------SelectWithUnionQuery (select 1 union all select 2) + * --------SelectQuery (select 1) + * ------SelectQuery (select 2) + * -----SelectQuery (select 5) + **/ + + auto & union_modes = ast.list_of_modes; + + if (union_modes.empty()) + return; + + auto selects = std::move(ast.list_of_selects->children); + + if (union_modes.size() + 1 != selects.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incorrect ASTSelectWithUnionQuery (modes: {}, selects: {})", + union_modes.size(), selects.size()); + + std::reverse(selects.begin(), selects.end()); + + ASTs children = {selects.back()}; + selects.pop_back(); + ASTSelectWithUnionQuery::UnionModes modes; + + for (const auto & mode : union_modes) + { + /// Flatten all previous selects into ASTSelectIntersectQuery + if (mode == ASTSelectWithUnionQuery::Mode::EXCEPT) + { + auto left = std::make_shared(); + left->union_mode = ASTSelectWithUnionQuery::Mode::ALL; + + left->list_of_selects = std::make_shared(); + left->children.push_back(left->list_of_selects); + left->list_of_selects->children = std::move(children); + + left->list_of_modes = std::move(modes); + modes = {}; + + auto right = selects.back(); + selects.pop_back(); + + auto list_node = std::make_shared(); + list_node->children = {left, right}; + + auto select_intersect_except = std::make_shared(); + select_intersect_except->final_operator = {ASTSelectIntersectExceptQuery::Operator::EXCEPT}; + select_intersect_except->children.emplace_back(std::move(list_node)); + select_intersect_except->list_of_selects = std::make_shared(); + select_intersect_except->list_of_selects->children.push_back(select_intersect_except->children[0]); + + children = {select_intersect_except}; + } + else if (!selects.empty()) + { + auto right = selects.back(); + selects.pop_back(); + children.emplace_back(std::move(right)); + modes.push_back(mode); + } + } + + if (!selects.empty()) + { + auto right = selects.back(); + selects.pop_back(); + children.emplace_back(std::move(right)); + } + + ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL; + ast.list_of_selects->children = std::move(children); + ast.list_of_modes = std::move(modes); +} + } diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.h b/src/Interpreters/SelectIntersectExceptQueryVisitor.h index 58f3071972f..1dd0694666d 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.h +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.h @@ -38,7 +38,7 @@ public: static void visit(ASTPtr & ast, Data &); static void visit(ASTSelectIntersectExceptQuery &, Data &); - // static void visit(ASTSelectWithUnionQuery &, Data &); + static void visit(ASTSelectWithUnionQuery &, Data &); }; /// Visit children first. diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 839447a90f7..4131bac28d1 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -492,14 +492,14 @@ static std::tuple executeQueryImpl( } { - /// Normalize SelectWithUnionQuery - NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; - NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); + SelectIntersectExceptQueryVisitor::Data data; + SelectIntersectExceptQueryVisitor{data}.visit(ast); } { - SelectIntersectExceptQueryVisitor::Data data; - SelectIntersectExceptQueryVisitor{data}.visit(ast); + /// Normalize SelectWithUnionQuery + NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); } /// Check the limits. @@ -540,7 +540,6 @@ static std::tuple executeQueryImpl( /// reset Input callbacks if query is not INSERT SELECT context->resetInputCallbacks(); - std::cerr << "\n\nAST: " << ast->dumpTree() << std::endl; auto interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); std::shared_ptr quota; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index fa7359574f8..d19e860c16a 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -8,6 +8,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} ASTPtr ASTSelectWithUnionQuery::clone() const { @@ -28,6 +32,9 @@ ASTPtr ASTSelectWithUnionQuery::clone() const void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { + if (!list_of_selects || list_of_selects->children.size() != list_of_modes.size() + 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect ASTSelectWithUnionQuery"); + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); auto mode_to_str = [&](auto mode) diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 0465bdac3a6..2c36bcecf6b 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -22,7 +22,8 @@ public: { Unspecified, ALL, - DISTINCT + DISTINCT, + EXCEPT }; using UnionModes = std::vector; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e22f2c7cded..e75aad8d02f 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -140,7 +140,14 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } // SELECT ... UNION SELECT ... else + { union_modes.push_back(ASTSelectWithUnionQuery::Mode::Unspecified); + } + return true; + } + else if (s_except_parser->check(pos, expected)) + { + union_modes.push_back(ASTSelectWithUnionQuery::Mode::EXCEPT); return true; } return false; @@ -1024,4 +1031,3 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } } - diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index ef6a5744603..36f39a50ab3 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -79,11 +79,17 @@ private: class ParserUnionList : public IParserBase { public: - ParserUnionList(ParserPtr && elem_parser_, ParserPtr && s_union_parser_, ParserPtr && s_all_parser_, ParserPtr && s_distinct_parser_) + ParserUnionList( + ParserPtr && elem_parser_, + ParserPtr && s_union_parser_, + ParserPtr && s_all_parser_, + ParserPtr && s_distinct_parser_, + ParserPtr && s_except_parser_) : elem_parser(std::move(elem_parser_)) , s_union_parser(std::move(s_union_parser_)) , s_all_parser(std::move(s_all_parser_)) , s_distinct_parser(std::move(s_distinct_parser_)) + , s_except_parser(std::move(s_except_parser_)) { } @@ -120,6 +126,7 @@ private: ParserPtr s_union_parser; ParserPtr s_all_parser; ParserPtr s_distinct_parser; + ParserPtr s_except_parser; ASTSelectWithUnionQuery::UnionModes union_modes; }; diff --git a/src/Parsers/ParserSelectIntersectExceptQuery.cpp b/src/Parsers/ParserSelectIntersectExceptQuery.cpp index b56598166c6..2b4ba9d60e2 100644 --- a/src/Parsers/ParserSelectIntersectExceptQuery.cpp +++ b/src/Parsers/ParserSelectIntersectExceptQuery.cpp @@ -14,7 +14,6 @@ namespace DB bool ParserSelectIntersectExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword intersect_keyword("INTERSECT"); - ParserKeyword except_keyword("EXCEPT"); ASTs elements; ASTSelectIntersectExceptQuery::Operators operators; @@ -32,13 +31,7 @@ bool ParserSelectIntersectExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expec auto parse_separator = [&]() -> bool { if (!intersect_keyword.ignore(pos)) - { - if (!except_keyword.ignore(pos)) - return false; - - operators.emplace_back(ASTSelectIntersectExceptQuery::Operator::EXCEPT); - return true; - } + return false; operators.emplace_back(ASTSelectIntersectExceptQuery::Operator::INTERSECT); return true; diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 87e2dab1a47..8c4c183a099 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -15,7 +15,8 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & std::make_unique(), std::make_unique("UNION"), std::make_unique("ALL"), - std::make_unique("DISTINCT")); + std::make_unique("DISTINCT"), + std::make_unique("EXCEPT")); if (!parser.parse(pos, list_node, expected)) return false; diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index e61afb5ba2a..e4d04115cff 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -65,6 +65,8 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, return std::make_shared(cur_header, converting_actions); }); } + + cur_pipeline->addTransform(std::make_shared(header, cur_pipeline->getNumStreams(), 1)); } *pipeline = QueryPipeline::unitePipelines(std::move(pipelines), max_threads); diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index c3272a5d574..9a9e4e1bf58 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -5,7 +5,7 @@ select 2 intersect select 1; select 1 except select 1; select 2 except select 1; 2 -select number from numbers(5, 5) intersect select number from numbers(20); +select number from numbers(20) intersect select number from numbers(5, 5); 5 6 7 @@ -26,12 +26,15 @@ select number, number+10 from numbers(12) except select number+5, number+15 from select 1 except select 2 intersect select 1; 1 select 1 except select 2 intersect select 2; +1 select 1 intersect select 1 except select 2; 1 select 1 intersect select 1 except select 1; select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 1; 1 select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2; +1 +select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2 except select 1; select number from numbers(10) except select 5; 0 1 @@ -71,6 +74,8 @@ select count() from (select number from numbers(1000000) intersect select number 600000 select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20)); 20 +select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20) union all select number from numbers(100, 10)); +30 select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000) except select number from numbers(300000, 200000) except select number from numbers(600000, 200000)); 200000 select 1 union all select 1 intersect select 1; @@ -78,3 +83,13 @@ select 1 union all select 1 intersect select 1; 1 select 1 union all select 1 intersect select 2; 1 +select * from (select 1 union all select 2 union all select 3 union all select 4 except select 3 union all select 5) order by 1; +1 +2 +4 +5 +select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5) order by 1; +1 +2 +3 +5 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index 722670732ac..c88951ef353 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -4,7 +4,7 @@ select 2 intersect select 1; select 1 except select 1; select 2 except select 1; -select number from numbers(5, 5) intersect select number from numbers(20); +select number from numbers(20) intersect select number from numbers(5, 5); select number from numbers(10) except select number from numbers(5); select number, number+10 from numbers(12) except select number+5, number+15 from numbers(10); @@ -14,6 +14,7 @@ select 1 intersect select 1 except select 2; select 1 intersect select 1 except select 1; select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 1; select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2; +select 1 intersect select 1 except select 2 intersect select 1 except select 3 intersect select 2 except select 1; select number from numbers(10) except select 5; select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20); @@ -22,7 +23,10 @@ with (select number from numbers(10) intersect select 5) as a select a * 10; select count() from (select number from numbers(10) except select 5); select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000)); select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20)); +select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20) union all select number from numbers(100, 10)); select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000) except select number from numbers(300000, 200000) except select number from numbers(600000, 200000)); select 1 union all select 1 intersect select 1; select 1 union all select 1 intersect select 2; +select * from (select 1 union all select 2 union all select 3 union all select 4 except select 3 union all select 5) order by 1; +select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5) order by 1; From 47fb923975b4b827c1296e71a1781edfc5a3e5d3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 13 Aug 2021 15:07:44 +0300 Subject: [PATCH 16/32] Some fixes, more tests --- .../AggregateFunctionMinMaxAny.h | 12 +----------- .../InterpreterSelectIntersectExceptQuery.cpp | 1 + .../NormalizeSelectWithUnionQueryVisitor.cpp | 3 --- .../SelectIntersectExceptQueryVisitor.cpp | 2 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 3 --- src/Parsers/ExpressionListParsers.cpp | 10 +++++----- .../QueryPlan/IntersectOrExceptStep.cpp | 5 +++++ .../Transforms/IntersectOrExceptTransform.cpp | 5 ----- ...02004_intersect_except_operators.reference | 6 ++++++ .../02004_intersect_except_operators.sql | 2 ++ .../02007_test_any_all_operators.reference | 19 +++++++++++++++++++ .../02007_test_any_all_operators.sql | 12 ++++++++++++ 12 files changed, 52 insertions(+), 28 deletions(-) create mode 100644 tests/queries/0_stateless/02007_test_any_all_operators.reference create mode 100644 tests/queries/0_stateless/02007_test_any_all_operators.sql diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 410f94c7afd..577b8127fd7 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -1032,17 +1032,7 @@ struct AggregateFunctionSingleValueOrNullData : Data #if USE_EMBEDDED_COMPILER - static constexpr bool is_compilable = Data::is_compilable; - - static void compileChangeIfBetter(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, llvm::Value * value_to_check) - { - Data::compileChangeFirstTime(builder, aggregate_data_ptr, value_to_check); - } - - static void compileChangeIfBetterMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) - { - Data::compileChangeFirstTimeMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr); - } + static constexpr bool is_compilable = false; #endif }; diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 9e24dd6e6a0..34d7ae5b37f 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH; + extern const int LOGICAL_ERROR; } static Block getCommonHeader(const Blocks & headers) diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index bbe1a4e048c..0990667b2a8 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -36,9 +36,6 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, ASTs selects; auto & select_list = ast.list_of_selects->children; - if (select_list.size() < 2) - return; - int i; for (i = union_modes.size() - 1; i >= 0; --i) { diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp index 5926d5f6f10..273bc327dc3 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -162,7 +162,7 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Dat children.emplace_back(std::move(right)); } - ast.union_mode = ASTSelectWithUnionQuery::Mode::ALL; + ast.union_mode = ASTSelectWithUnionQuery::Mode::Unspecified; ast.list_of_selects->children = std::move(children); ast.list_of_modes = std::move(modes); } diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index d19e860c16a..b882c738c9a 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -32,9 +32,6 @@ ASTPtr ASTSelectWithUnionQuery::clone() const void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!list_of_selects || list_of_selects->children.size() != list_of_modes.size() + 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect ASTSelectWithUnionQuery"); - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); auto mode_to_str = [&](auto mode) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index e75aad8d02f..33085379abb 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -186,13 +186,13 @@ enum class SubqueryFunctionType ALL }; -static bool modifyAST(String operator_name, std::shared_ptr & function, SubqueryFunctionType type) +static bool modifyAST(const String & operator_name, ASTPtr function, SubqueryFunctionType type) { // = ANY --> IN, != ALL --> NOT IN - if ((operator_name == "equals" && type == SubqueryFunctionType::ANY) - || (operator_name == "notEquals" && type == SubqueryFunctionType::ALL)) + if ((type == SubqueryFunctionType::ANY && operator_name == "equals") + || (type == SubqueryFunctionType::ALL && operator_name == "notEquals")) { - function->name = "in"; + assert_cast(function.get())->name = "in"; if (operator_name == "notEquals") { auto function_not = std::make_shared(); @@ -257,7 +257,7 @@ static bool modifyAST(String operator_name, std::shared_ptr & funct if (operator_name == "equals" || operator_name == "notEquals") { aggregate_function->name = "singleValueOrNull"; - function->name = "in"; + assert_cast(function.get())->name = "in"; if (operator_name == "notEquals") { auto function_not = std::make_shared(); diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index e4d04115cff..b75898b815b 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -12,6 +12,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static Block checkHeaders(const DataStreams & input_streams_) { if (input_streams_.empty()) diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index b16032bde8e..abfd1a7f0ad 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -4,11 +4,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int SET_SIZE_LIMIT_EXCEEDED; -} - IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, Operator operator_) : IProcessor(InputPorts(2, header_), {header_}) , current_operator(operator_) diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index 9a9e4e1bf58..a097bd0076f 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -66,6 +66,8 @@ select number from numbers(100) intersect select number from numbers(20, 60) exc 57 58 59 +select * from (select 1 intersect select 1); +1 with (select number from numbers(10) intersect select 5) as a select a * 10; 50 select count() from (select number from numbers(10) except select 5); @@ -93,3 +95,7 @@ select * from (select 1 union all select 2 union all select 3 union all select 4 2 3 5 +select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5 except select 1) order by 1; +2 +3 +5 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index c88951ef353..4602dec7238 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -19,6 +19,7 @@ select 1 intersect select 1 except select 2 intersect select 1 except select 3 i select number from numbers(10) except select 5; select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20); +select * from (select 1 intersect select 1); with (select number from numbers(10) intersect select 5) as a select a * 10; select count() from (select number from numbers(10) except select 5); select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000)); @@ -30,3 +31,4 @@ select 1 union all select 1 intersect select 1; select 1 union all select 1 intersect select 2; select * from (select 1 union all select 2 union all select 3 union all select 4 except select 3 union all select 5) order by 1; select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5) order by 1; +select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5 except select 1) order by 1; diff --git a/tests/queries/0_stateless/02007_test_any_all_operators.reference b/tests/queries/0_stateless/02007_test_any_all_operators.reference new file mode 100644 index 00000000000..cd36102cb80 --- /dev/null +++ b/tests/queries/0_stateless/02007_test_any_all_operators.reference @@ -0,0 +1,19 @@ +-- { echo } +select 1 == any (select number from numbers(10)); +1 +select 1 == any (select number from numbers(2, 10)); +0 +select 1 == all (select 1 from numbers(10)); +1 +select 1 == all (select number from numbers(10)); +0 +select number as a from numbers(10) where a == any (select number from numbers(3, 3)); +3 +4 +5 +-- TODO: Incorrect: +select 1 != any (select 1 from numbers(10)); +1 +select 1 != all (select 1 from numbers(10)); +1 +select number as a from numbers(10) where a != any (select number from numbers(3, 3)); diff --git a/tests/queries/0_stateless/02007_test_any_all_operators.sql b/tests/queries/0_stateless/02007_test_any_all_operators.sql new file mode 100644 index 00000000000..08fc929bab9 --- /dev/null +++ b/tests/queries/0_stateless/02007_test_any_all_operators.sql @@ -0,0 +1,12 @@ +-- { echo } +select 1 == any (select number from numbers(10)); +select 1 == any (select number from numbers(2, 10)); +select 1 == all (select 1 from numbers(10)); +select 1 == all (select number from numbers(10)); +select number as a from numbers(10) where a == any (select number from numbers(3, 3)); + +-- TODO: Incorrect: +select 1 != any (select 1 from numbers(10)); +select 1 != all (select 1 from numbers(10)); +select number as a from numbers(10) where a != any (select number from numbers(3, 3)); + From c534363abe05b348e0e1a3100d9af80f6f5a0088 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 13 Aug 2021 20:33:10 +0300 Subject: [PATCH 17/32] fix intersecting parts --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index ea5f7cfc36a..ef276a53df2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1012,8 +1012,24 @@ bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_ bool ReplicatedMergeTreeQueue::addFuturePartIfNotCoveredByThem(const String & part_name, LogEntry & entry, String & reject_reason) { + /// We have found `part_name` on some replica and are going to fetch it instead of covered `entry->new_part_name`. std::lock_guard lock(state_mutex); + if (virtual_parts.getContainingPart(part_name).empty()) + { + /// We should not fetch any parts that absent in our `virtual_parts` set, + /// because we do not know about such parts according to our replication queue (we know about them from some side-channel). + /// Otherwise, it may break invariants in replication queue reordering, for example: + /// 1. Our queue contains GET_PART all_2_2_0, log contains DROP_RANGE all_2_2_0 and MERGE_PARTS all_1_3_1 + /// 2. We execute GET_PART all_2_2_0, but fetch all_1_3_1 instead + /// (drop_ranges.isAffectedByDropRange(...) is false-negative, because DROP_RANGE all_2_2_0 is not pulled yet). + /// It actually means, that MERGE_PARTS all_1_3_1 is executed too, but it's not even pulled yet. + /// 3. Then we pull log, trying to execute DROP_RANGE all_2_2_0 + /// and reveal that it was incorrectly reordered with MERGE_PARTS all_1_3_1 (drop range intersects merged part). + reject_reason = fmt::format("Log entry for part {} or covering part is not pulled from log to queue yet.", part_name); + return false; + } + /// FIXME get rid of actual_part_name. /// If new covering part jumps over DROP_RANGE we should execute drop range first if (drop_ranges.isAffectedByDropRange(part_name, reject_reason)) From 382fd7d4aca4a5220c9a227beffc6a806bcea031 Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 5 Aug 2021 19:55:19 +0000 Subject: [PATCH 18/32] translated parts in MergeTree --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +- docs/ru/engines/table-engines/mergetree-family/mergetree.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 0c900454cd0..1b1313e625c 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -79,7 +79,7 @@ For a description of parameters, see the [CREATE query description](../../../sql - `SAMPLE BY` — An expression for sampling. Optional. - If a sampling expression is used, the primary key must contain it. The result of sampling expression must be unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. + If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. - `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index db6eb8154ba..61ed34b686c 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -68,7 +68,7 @@ ORDER BY expr - `SAMPLE BY` — выражение для сэмплирования. Необязательный параметр. - Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. + Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Результат выражения для сэмплирования должен быть беззнаковым целым числом. Пример: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. - `TTL` — список правил, определяющих длительности хранения строк, а также задающих правила перемещения частей на определённые тома или диски. Необязательный параметр. From 18ab53488fab117b8c08349b22c914732b9d69eb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 14 Aug 2021 15:31:55 +0300 Subject: [PATCH 19/32] Better --- .../InterpreterSelectIntersectExceptQuery.cpp | 2 +- .../SelectIntersectExceptQueryVisitor.cpp | 163 +++++++----------- .../SelectIntersectExceptQueryVisitor.h | 18 +- src/Parsers/ASTSelectIntersectExceptQuery.cpp | 42 +---- src/Parsers/ASTSelectIntersectExceptQuery.h | 5 - src/Parsers/ASTSelectWithUnionQuery.h | 3 +- src/Parsers/ExpressionListParsers.cpp | 22 ++- src/Parsers/ExpressionListParsers.h | 19 -- src/Parsers/ParserQueryWithOutput.cpp | 1 - .../ParserSelectIntersectExceptQuery.cpp | 62 ------- .../ParserSelectIntersectExceptQuery.h | 14 -- src/Parsers/ParserSelectWithUnionQuery.cpp | 8 +- src/Parsers/ParserUnionQueryElement.cpp | 2 - ...02004_intersect_except_operators.reference | 23 +++ .../02004_intersect_except_operators.sql | 7 + 15 files changed, 117 insertions(+), 274 deletions(-) delete mode 100644 src/Parsers/ParserSelectIntersectExceptQuery.cpp delete mode 100644 src/Parsers/ParserSelectIntersectExceptQuery.h diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 34d7ae5b37f..4edd13d08e5 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -56,7 +56,7 @@ InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery( ASTSelectIntersectExceptQuery * ast = query_ptr->as(); final_operator = ast->final_operator; - const auto & children = ast->children[0]->children; + const auto & children = ast->children; size_t num_children = children.size(); /// AST must have been changed by the visitor. diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp index 273bc327dc3..3815fb4ad0f 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -18,87 +18,12 @@ namespace DB void SelectIntersectExceptQueryMatcher::visit(ASTPtr & ast, Data & data) { - if (auto * select_intersect_except = ast->as()) - { - if (select_intersect_except->final_operator != ASTSelectIntersectExceptQuery::Operator::UNKNOWN) - return; - - data.initialize(select_intersect_except); - visit(*select_intersect_except, data); - } - else if (auto * select_union = ast->as()) - { + if (auto * select_union = ast->as()) visit(*select_union, data); - } -} - -void SelectIntersectExceptQueryMatcher::visit(ASTSelectIntersectExceptQuery & ast, Data & data) -{ - /* Example: select 1 intersect select 1 intsect select 1 intersect select 1 intersect select 1; - * - * --SelectIntersectExceptQuery --SelectIntersectExceptQuery - * ---ExpressionList ---ExpressionList - * ----SelectQuery ----SelectIntersectExceptQuery - * ----SelectQuery ------ExpressionList - * ----SelectQuery ---> -------SelectIntersectExceptQuery - * ----SelectQuery --------ExpressionList - * ---------SelectQuery - * ---------SelectQuery - * -------SelectQuery - * ----SelectQuery - **/ - - auto & selects = data.reversed_list_of_selects; - - if (selects.empty()) - return; - - const auto left = selects.back(); - selects.pop_back(); - const auto right = selects.back(); - selects.pop_back(); - - auto & operators = data.reversed_list_of_operators; - const auto current_operator = operators.back(); - operators.pop_back(); - - auto list_node = std::make_shared(); - list_node->children = {left, right}; - - if (selects.empty()) - { - ast.final_operator = current_operator; - ast.children = {std::move(list_node)}; - } - else - { - auto select_intersect_except = std::make_shared(); - select_intersect_except->final_operator = {current_operator}; - select_intersect_except->list_of_selects = std::move(list_node); - select_intersect_except->children.push_back(select_intersect_except->list_of_selects); - - selects.emplace_back(std::move(select_intersect_except)); - } - - visit(ast, data); } void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data &) { - /* Example: select 1 union all select 2 except select 1 except select 2 union distinct select 5; - * - * --SelectWithUnionQuery --SelectIntersectExceptQuery - * ---ExpressionList ---ExpressionList - * ----SelectQuery ----SelectIntersectExceptQuery - * ----SelectQuery -----ExpressionList - * ----SelectQuery (except) ---> ------SelectIntersectExceptQuery - * ----SelectQuery (except) -------ExpressionList - * ----SelectQuery --------SelectWithUnionQuery (select 1 union all select 2) - * --------SelectQuery (select 1) - * ------SelectQuery (select 2) - * -----SelectQuery (select 5) - **/ - auto & union_modes = ast.list_of_modes; if (union_modes.empty()) @@ -107,8 +32,7 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Dat auto selects = std::move(ast.list_of_selects->children); if (union_modes.size() + 1 != selects.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect ASTSelectWithUnionQuery (modes: {}, selects: {})", + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect ASTSelectWithUnionQuery (modes: {}, selects: {})", union_modes.size(), selects.size()); std::reverse(selects.begin(), selects.end()); @@ -119,39 +43,70 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Dat for (const auto & mode : union_modes) { - /// Flatten all previous selects into ASTSelectIntersectQuery - if (mode == ASTSelectWithUnionQuery::Mode::EXCEPT) + switch (mode) { - auto left = std::make_shared(); - left->union_mode = ASTSelectWithUnionQuery::Mode::ALL; + case ASTSelectWithUnionQuery::Mode::EXCEPT: + { + auto left = std::make_shared(); + left->union_mode = ASTSelectWithUnionQuery::Mode::ALL; - left->list_of_selects = std::make_shared(); - left->children.push_back(left->list_of_selects); - left->list_of_selects->children = std::move(children); + left->list_of_selects = std::make_shared(); + left->children.push_back(left->list_of_selects); + left->list_of_selects->children = std::move(children); - left->list_of_modes = std::move(modes); - modes = {}; + left->list_of_modes = std::move(modes); + modes = {}; - auto right = selects.back(); - selects.pop_back(); + auto right = selects.back(); + selects.pop_back(); - auto list_node = std::make_shared(); - list_node->children = {left, right}; + auto except_node = std::make_shared(); + except_node->final_operator = ASTSelectIntersectExceptQuery::Operator::EXCEPT; + except_node->children = {left, right}; - auto select_intersect_except = std::make_shared(); - select_intersect_except->final_operator = {ASTSelectIntersectExceptQuery::Operator::EXCEPT}; - select_intersect_except->children.emplace_back(std::move(list_node)); - select_intersect_except->list_of_selects = std::make_shared(); - select_intersect_except->list_of_selects->children.push_back(select_intersect_except->children[0]); + children = {except_node}; + break; + } + case ASTSelectWithUnionQuery::Mode::INTERSECT: + { + bool from_except = false; + const auto * except_ast = typeid_cast(children.back().get()); + if (except_ast && (except_ast->final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT)) + from_except = true; - children = {select_intersect_except}; - } - else if (!selects.empty()) - { - auto right = selects.back(); - selects.pop_back(); - children.emplace_back(std::move(right)); - modes.push_back(mode); + ASTPtr left; + if (from_except) + { + left = std::move(children.back()->children[1]); + } + else + { + left = children.back(); + children.pop_back(); + } + + auto right = selects.back(); + selects.pop_back(); + + auto intersect_node = std::make_shared(); + intersect_node->final_operator = ASTSelectIntersectExceptQuery::Operator::INTERSECT; + intersect_node->children = {left, right}; + + if (from_except) + children.back()->children[1] = std::move(intersect_node); + else + children.push_back(std::move(intersect_node)); + + break; + } + default: + { + auto right = selects.back(); + selects.pop_back(); + children.emplace_back(std::move(right)); + modes.push_back(mode); + break; + } } } diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.h b/src/Interpreters/SelectIntersectExceptQueryVisitor.h index 1dd0694666d..07a6ad606a1 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.h +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.h @@ -17,27 +17,11 @@ class ASTFunction; class SelectIntersectExceptQueryMatcher { public: - struct Data - { - Data() = default; - - void initialize(const ASTSelectIntersectExceptQuery * select_intersect_except) - { - reversed_list_of_selects = select_intersect_except->list_of_selects->clone()->children; - reversed_list_of_operators = select_intersect_except->list_of_operators; - - std::reverse(reversed_list_of_selects.begin(), reversed_list_of_selects.end()); - std::reverse(reversed_list_of_operators.begin(), reversed_list_of_operators.end()); - } - - ASTs reversed_list_of_selects; - ASTSelectIntersectExceptQuery::Operators reversed_list_of_operators; - }; + struct Data {}; static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } static void visit(ASTPtr & ast, Data &); - static void visit(ASTSelectIntersectExceptQuery &, Data &); static void visit(ASTSelectWithUnionQuery &, Data &); }; diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.cpp b/src/Parsers/ASTSelectIntersectExceptQuery.cpp index 26fd9353d5b..9d7a717fa6c 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.cpp +++ b/src/Parsers/ASTSelectIntersectExceptQuery.cpp @@ -14,10 +14,6 @@ ASTPtr ASTSelectIntersectExceptQuery::clone() const for (const auto & child : children) res->children.push_back(child->clone()); - if (res->list_of_selects) - res->list_of_selects = list_of_selects->clone(); - - res->list_of_operators = list_of_operators; res->final_operator = final_operator; cloneOutputOptions(*res); @@ -28,44 +24,18 @@ void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & setti { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - auto operator_to_str = [&](auto current_operator) + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) { - if (current_operator == Operator::INTERSECT) - return "INTERSECT"; - else - return "EXCEPT"; - }; - - for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it) - { - if (it != list_of_selects->children.begin()) + if (it != children.begin()) { settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") - << operator_to_str(list_of_operators[it - list_of_selects->children.begin() - 1]) + << (final_operator == Operator::INTERSECT ? "INTERSECT" : "EXCEPT") << (settings.hilite ? hilite_none : ""); } - if (auto * node = (*it)->as()) - { - settings.ostr << settings.nl_or_ws << indent_str; - - if (node->list_of_selects->children.size() == 1) - { - (node->list_of_selects->children.at(0))->formatImpl(settings, state, frame); - } - else - { - auto sub_query = std::make_shared(); - sub_query->children.push_back(*it); - sub_query->formatImpl(settings, state, frame); - } - } - else - { - if (it != list_of_selects->children.begin()) - settings.ostr << settings.nl_or_ws; - (*it)->formatImpl(settings, state, frame); - } + if (it != children.begin()) + settings.ostr << settings.nl_or_ws; + (*it)->formatImpl(settings, state, frame); } } diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.h b/src/Parsers/ASTSelectIntersectExceptQuery.h index 8fc5756e370..97a8296ce2c 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.h +++ b/src/Parsers/ASTSelectIntersectExceptQuery.h @@ -24,11 +24,6 @@ public: EXCEPT }; - using Operators = std::vector; - - ASTPtr list_of_selects; - Operators list_of_operators; - /// Final operator after applying visitor. Operator final_operator = Operator::UNKNOWN; }; diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 2c36bcecf6b..629e9b5d96d 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -23,7 +23,8 @@ public: Unspecified, ALL, DISTINCT, - EXCEPT + EXCEPT, + INTERSECT }; using UnionModes = std::vector; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 33085379abb..ef54c627aab 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -111,12 +112,18 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + ParserUnionQueryElement elem_parser; + ParserKeyword s_union_parser("UNION"); + ParserKeyword s_all_parser("ALL"); + ParserKeyword s_distinct_parser("DISTINCT"); + ParserKeyword s_except_parser("EXCEPT"); + ParserKeyword s_intersect_parser("INTERSECT"); ASTs elements; auto parse_element = [&] { ASTPtr element; - if (!elem_parser->parse(pos, element, expected)) + if (!elem_parser.parse(pos, element, expected)) return false; elements.push_back(element); @@ -126,15 +133,15 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Parse UNION type auto parse_separator = [&] { - if (s_union_parser->ignore(pos, expected)) + if (s_union_parser.ignore(pos, expected)) { // SELECT ... UNION ALL SELECT ... - if (s_all_parser->check(pos, expected)) + if (s_all_parser.check(pos, expected)) { union_modes.push_back(ASTSelectWithUnionQuery::Mode::ALL); } // SELECT ... UNION DISTINCT SELECT ... - else if (s_distinct_parser->check(pos, expected)) + else if (s_distinct_parser.check(pos, expected)) { union_modes.push_back(ASTSelectWithUnionQuery::Mode::DISTINCT); } @@ -145,11 +152,16 @@ bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } return true; } - else if (s_except_parser->check(pos, expected)) + else if (s_except_parser.check(pos, expected)) { union_modes.push_back(ASTSelectWithUnionQuery::Mode::EXCEPT); return true; } + else if (s_intersect_parser.check(pos, expected)) + { + union_modes.push_back(ASTSelectWithUnionQuery::Mode::INTERSECT); + return true; + } return false; }; diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 36f39a50ab3..e44cacb313f 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -79,20 +79,6 @@ private: class ParserUnionList : public IParserBase { public: - ParserUnionList( - ParserPtr && elem_parser_, - ParserPtr && s_union_parser_, - ParserPtr && s_all_parser_, - ParserPtr && s_distinct_parser_, - ParserPtr && s_except_parser_) - : elem_parser(std::move(elem_parser_)) - , s_union_parser(std::move(s_union_parser_)) - , s_all_parser(std::move(s_all_parser_)) - , s_distinct_parser(std::move(s_distinct_parser_)) - , s_except_parser(std::move(s_except_parser_)) - { - } - template static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator) { @@ -122,11 +108,6 @@ protected: const char * getName() const override { return "list of union elements"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; private: - ParserPtr elem_parser; - ParserPtr s_union_parser; - ParserPtr s_all_parser; - ParserPtr s_distinct_parser; - ParserPtr s_except_parser; ASTSelectWithUnionQuery::UnionModes union_modes; }; diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index e2ab8a84cc1..4a73952674c 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Parsers/ParserSelectIntersectExceptQuery.cpp b/src/Parsers/ParserSelectIntersectExceptQuery.cpp deleted file mode 100644 index 2b4ba9d60e2..00000000000 --- a/src/Parsers/ParserSelectIntersectExceptQuery.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -bool ParserSelectIntersectExceptQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserKeyword intersect_keyword("INTERSECT"); - - ASTs elements; - ASTSelectIntersectExceptQuery::Operators operators; - - auto parse_element = [&]() -> bool - { - ASTPtr element; - if (!ParserSelectQuery().parse(pos, element, expected) && !ParserSubquery().parse(pos, element, expected)) - return false; - - elements.push_back(element); - return true; - }; - - auto parse_separator = [&]() -> bool - { - if (!intersect_keyword.ignore(pos)) - return false; - - operators.emplace_back(ASTSelectIntersectExceptQuery::Operator::INTERSECT); - return true; - }; - - if (!ParserUnionList::parseUtil(pos, parse_element, parse_separator)) - return false; - - if (operators.empty() || elements.empty()) - return false; - - if (operators.size() + 1 != elements.size()) - return false; - - auto list_node = std::make_shared(); - list_node->children = std::move(elements); - - auto intersect_or_except_ast = std::make_shared(); - - node = intersect_or_except_ast; - intersect_or_except_ast->list_of_selects = list_node; - intersect_or_except_ast->children.push_back(intersect_or_except_ast->list_of_selects); - intersect_or_except_ast->list_of_operators = operators; - - return true; -} - -} diff --git a/src/Parsers/ParserSelectIntersectExceptQuery.h b/src/Parsers/ParserSelectIntersectExceptQuery.h deleted file mode 100644 index e01785113a8..00000000000 --- a/src/Parsers/ParserSelectIntersectExceptQuery.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#include - - -namespace DB -{ -class ParserSelectIntersectExceptQuery : public IParserBase -{ -protected: - const char * getName() const override { return "INTERSECT or EXCEPT"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -} diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 8c4c183a099..532a9e20735 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -10,13 +10,7 @@ namespace DB bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; - - ParserUnionList parser( - std::make_unique(), - std::make_unique("UNION"), - std::make_unique("ALL"), - std::make_unique("DISTINCT"), - std::make_unique("EXCEPT")); + ParserUnionList parser; if (!parser.parse(pos, list_node, expected)) return false; diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index 5abbce25930..d59a7be2278 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include @@ -12,7 +11,6 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!ParserSubquery().parse(pos, node, expected) - && !ParserSelectIntersectExceptQuery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) return false; diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index a097bd0076f..7f41faaf83a 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -99,3 +99,26 @@ select * from (select 1 union all select 2 union all select 3 union all select 4 2 3 5 +select 1 intersect (select 1 except select 2); +1 +select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; +explain syntax select 1 intersect select 1; +SELECT 1 +INTERSECT +SELECT 1 +explain syntax select 1 except select 1; +SELECT 1 +EXCEPT +SELECT 1 +explain syntax select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; +SELECT 1 +UNION ALL +SELECT 2 +EXCEPT +SELECT 2 +EXCEPT +SELECT 1 +UNION ALL +SELECT 1 +EXCEPT +SELECT 4 diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index 4602dec7238..ef0e52da116 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -32,3 +32,10 @@ select 1 union all select 1 intersect select 2; select * from (select 1 union all select 2 union all select 3 union all select 4 except select 3 union all select 5) order by 1; select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5) order by 1; select * from (select 1 union all select 2 union all select 3 union all select 4 intersect select 3 union all select 5 except select 1) order by 1; + +select 1 intersect (select 1 except select 2); +select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; + +explain syntax select 1 intersect select 1; +explain syntax select 1 except select 1; +explain syntax select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; From f125fb3fef9c417c82ebf52a14d4aa4aa1b8e88f Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 14 Aug 2021 20:04:21 +0300 Subject: [PATCH 20/32] Some fixes around any/all --- .../AggregateFunctionMinMaxAny.h | 12 +- .../SelectIntersectExceptQueryVisitor.cpp | 4 + src/Parsers/ASTSelectWithUnionQuery.cpp | 4 - src/Parsers/ExpressionListParsers.cpp | 124 ++++++++---------- .../QueryPlan/IntersectOrExceptStep.h | 2 +- .../Transforms/IntersectOrExceptTransform.h | 2 +- .../02007_test_any_all_operators.reference | 22 +++- .../02007_test_any_all_operators.sql | 13 +- 8 files changed, 101 insertions(+), 82 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 577b8127fd7..e5471b8a727 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -50,6 +50,8 @@ private: T value; public: + static constexpr bool is_nullable = false; + bool has() const { return has_value; @@ -470,6 +472,8 @@ private: char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero. public: + static constexpr bool is_nullable = false; + bool has() const { return size >= 0; @@ -693,6 +697,8 @@ private: Field value; public: + static constexpr bool is_nullable = false; + bool has() const { return !value.isNull(); @@ -979,6 +985,8 @@ struct AggregateFunctionAnyLastData : Data template struct AggregateFunctionSingleValueOrNullData : Data { + static constexpr bool is_nullable = true; + using Self = AggregateFunctionSingleValueOrNullData; bool first_value = true; @@ -1136,7 +1144,9 @@ public: DataTypePtr getReturnType() const override { auto result_type = this->argument_types.at(0); - return Data::name() == "singleValueOrNull" ? makeNullable(result_type) : result_type; + if constexpr (Data::is_nullable) + return makeNullable(result_type); + return result_type; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp index 3815fb4ad0f..190ec279038 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -5,6 +5,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} /* * Note: there is a difference between intersect and except behaviour. diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index b882c738c9a..fa7359574f8 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -8,10 +8,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} ASTPtr ASTSelectWithUnionQuery::clone() const { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index ef54c627aab..69d95422799 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -198,91 +198,83 @@ enum class SubqueryFunctionType ALL }; -static bool modifyAST(const String & operator_name, ASTPtr function, SubqueryFunctionType type) +static bool modifyAST(ASTPtr ast, SubqueryFunctionType type) { - // = ANY --> IN, != ALL --> NOT IN - if ((type == SubqueryFunctionType::ANY && operator_name == "equals") - || (type == SubqueryFunctionType::ALL && operator_name == "notEquals")) + /* Rewrite in AST: + * = ANY --> IN + * != ALL --> NOT IN + * = ALL --> IN (SELECT singleValueOrNull(*) FROM subquery) + * != ANY --> NOT IN (SELECT singleValueOrNull(*) FROM subquery) + **/ + + auto * function = assert_cast(ast.get()); + String operator_name = function->name; + + auto function_equals = operator_name == "equals"; + auto function_not_equals = operator_name == "notEquals"; + + String aggregate_function_name; + if (function_equals || function_not_equals) { - assert_cast(function.get())->name = "in"; if (operator_name == "notEquals") + function->name = "notIn"; + else + function->name = "in"; + + if ((type == SubqueryFunctionType::ANY && function_equals) + || (type == SubqueryFunctionType::ALL && function_not_equals)) { - auto function_not = std::make_shared(); - auto exp_list_not = std::make_shared(); - exp_list_not->children.push_back(function); - function_not->name = "not"; - function_not->children.push_back(exp_list_not); - function_not->arguments = exp_list_not; - function = function_not; + return true; } - return true; + + aggregate_function_name = "singleValueOrNull"; } + else if (operator_name == "greaterOrEquals" || operator_name == "greater") + { + aggregate_function_name = (type == SubqueryFunctionType::ANY ? "min" : "max"); + } + else if (operator_name == "lessOrEquals" || operator_name == "less") + { + aggregate_function_name = (type == SubqueryFunctionType::ANY ? "max" : "min"); + } + else + return false; - // subquery --> (SELECT aggregate_function(*) FROM subquery) - auto aggregate_function = std::make_shared(); - auto aggregate_function_exp_list = std::make_shared(); - aggregate_function_exp_list ->children.push_back(std::make_shared()); - aggregate_function->arguments = aggregate_function_exp_list; - aggregate_function->children.push_back(aggregate_function_exp_list); + /// subquery --> (SELECT aggregate_function(*) FROM subquery) + auto aggregate_function = makeASTFunction(aggregate_function_name, std::make_shared()); + auto subquery_node = function->children[0]->children[1]; - ASTPtr subquery_node = function->children[0]->children[1]; - auto select_query = std::make_shared(); - auto tables_in_select = std::make_shared(); - auto tables_in_select_element = std::make_shared(); auto table_expression = std::make_shared(); - table_expression->subquery = subquery_node; - table_expression->children.push_back(subquery_node); - tables_in_select_element->table_expression = table_expression; - tables_in_select_element->children.push_back(table_expression); - tables_in_select->children.push_back(tables_in_select_element); + table_expression->subquery = std::move(subquery_node); + table_expression->children.push_back(table_expression->subquery); + + auto tables_in_select_element = std::make_shared(); + tables_in_select_element->table_expression = std::move(table_expression); + tables_in_select_element->children.push_back(tables_in_select_element->table_expression); + + auto tables_in_select = std::make_shared(); + tables_in_select->children.push_back(std::move(tables_in_select_element)); + auto select_exp_list = std::make_shared(); select_exp_list->children.push_back(aggregate_function); + + auto select_query = std::make_shared(); select_query->children.push_back(select_exp_list); select_query->children.push_back(tables_in_select); - select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_exp_list)); - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select)); + + select_query->setExpression(ASTSelectQuery::Expression::SELECT, select_exp_list); + select_query->setExpression(ASTSelectQuery::Expression::TABLES, tables_in_select); auto select_with_union_query = std::make_shared(); - auto list_of_selects = std::make_shared(); - list_of_selects->children.push_back(select_query); - select_with_union_query->list_of_selects = list_of_selects; + select_with_union_query->list_of_selects = std::make_shared(); + select_with_union_query->list_of_selects->children.push_back(std::move(select_query)); select_with_union_query->children.push_back(select_with_union_query->list_of_selects); auto new_subquery = std::make_shared(); new_subquery->children.push_back(select_with_union_query); - function->children[0]->children.pop_back(); - function->children[0]->children.push_back(new_subquery); + ast->children[0]->children.back() = std::move(new_subquery); - if (operator_name == "greaterOrEquals" || operator_name == "greater") - { - aggregate_function->name = type == SubqueryFunctionType::ANY ? "min" : "max"; - return true; - } - if (operator_name == "lessOrEquals" || operator_name == "less") - { - aggregate_function->name = type == SubqueryFunctionType::ANY ? "max" : "min"; - return true; - } - - // = ALL --> IN (SELECT singleValueOrNull(*) FROM subquery) - // != ANY --> NOT IN (SELECT singleValueOrNull(*) FROM subquery) - if (operator_name == "equals" || operator_name == "notEquals") - { - aggregate_function->name = "singleValueOrNull"; - assert_cast(function.get())->name = "in"; - if (operator_name == "notEquals") - { - auto function_not = std::make_shared(); - auto exp_list_not = std::make_shared(); - exp_list_not->children.push_back(function); - function_not->name = "not"; - function_not->children.push_back(exp_list_not); - function_not->arguments = exp_list_not; - function = function_not; - } - return true; - } - return false; + return true; } bool ParserComparisonExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -346,7 +338,7 @@ bool ParserComparisonExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & exp_list->children.push_back(node); exp_list->children.push_back(elem); - if (subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function->name, function, subquery_function_type)) + if (subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function, subquery_function_type)) return false; pos.increaseDepth(); diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h index 002f1b1570c..9e87c921ab2 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.h +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h @@ -12,7 +12,7 @@ using Operator = ASTSelectIntersectExceptQuery::Operator; public: /// max_threads is used to limit the number of threads for result pipeline. - IntersectOrExceptStep(DataStreams input_streams_, Operator operators_, size_t max_threads_ = 0); + IntersectOrExceptStep(DataStreams input_streams_, Operator operator_, size_t max_threads_ = 0); String getName() const override { return "IntersectOrExcept"; } diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.h b/src/Processors/Transforms/IntersectOrExceptTransform.h index da1fa6a119e..e200bfd6cc5 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.h +++ b/src/Processors/Transforms/IntersectOrExceptTransform.h @@ -14,7 +14,7 @@ class IntersectOrExceptTransform : public IProcessor using Operator = ASTSelectIntersectExceptQuery::Operator; public: - IntersectOrExceptTransform(const Block & header_, Operator operators); + IntersectOrExceptTransform(const Block & header_, Operator operator_); String getName() const override { return "IntersectOrExcept"; } diff --git a/tests/queries/0_stateless/02007_test_any_all_operators.reference b/tests/queries/0_stateless/02007_test_any_all_operators.reference index cd36102cb80..ebd7cd8f6ca 100644 --- a/tests/queries/0_stateless/02007_test_any_all_operators.reference +++ b/tests/queries/0_stateless/02007_test_any_all_operators.reference @@ -3,17 +3,29 @@ select 1 == any (select number from numbers(10)); 1 select 1 == any (select number from numbers(2, 10)); 0 +select 1 != all (select 1 from numbers(10)); +0 +select 1 != all (select number from numbers(10)); +0 select 1 == all (select 1 from numbers(10)); 1 select 1 == all (select number from numbers(10)); 0 +select 1 != any (select 1 from numbers(10)); +0 +select 1 != any (select number from numbers(10)); +1 select number as a from numbers(10) where a == any (select number from numbers(3, 3)); 3 4 5 --- TODO: Incorrect: -select 1 != any (select 1 from numbers(10)); +select number as a from numbers(10) where a != any (select 5 from numbers(3, 3)); +0 1 -select 1 != all (select 1 from numbers(10)); -1 -select number as a from numbers(10) where a != any (select number from numbers(3, 3)); +2 +3 +4 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/02007_test_any_all_operators.sql b/tests/queries/0_stateless/02007_test_any_all_operators.sql index 08fc929bab9..525f7e1fabd 100644 --- a/tests/queries/0_stateless/02007_test_any_all_operators.sql +++ b/tests/queries/0_stateless/02007_test_any_all_operators.sql @@ -1,12 +1,17 @@ -- { echo } select 1 == any (select number from numbers(10)); select 1 == any (select number from numbers(2, 10)); + +select 1 != all (select 1 from numbers(10)); +select 1 != all (select number from numbers(10)); + select 1 == all (select 1 from numbers(10)); select 1 == all (select number from numbers(10)); -select number as a from numbers(10) where a == any (select number from numbers(3, 3)); --- TODO: Incorrect: + select 1 != any (select 1 from numbers(10)); -select 1 != all (select 1 from numbers(10)); -select number as a from numbers(10) where a != any (select number from numbers(3, 3)); +select 1 != any (select number from numbers(10)); + +select number as a from numbers(10) where a == any (select number from numbers(3, 3)); +select number as a from numbers(10) where a != any (select 5 from numbers(3, 3)); From 15eb68d117d89128ee91fe7494f456e72aaa5479 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Aug 2021 09:33:08 +0300 Subject: [PATCH 21/32] Fix bad cast in arrayIndex #26330 --- src/Functions/array/arrayIndex.h | 7 +++++++ .../0_stateless/02010_array_index_bad_cast.reference | 0 tests/queries/0_stateless/02010_array_index_bad_cast.sql | 2 ++ 3 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/02010_array_index_bad_cast.reference create mode 100644 tests/queries/0_stateless/02010_array_index_bad_cast.sql diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index a390abc4eaf..d7bbcaf8d46 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -115,6 +115,13 @@ private: [[maybe_unused]] const NullMap * const null_map_data, [[maybe_unused]] const NullMap * const null_map_item) { + if constexpr (std::is_same_v && std::is_same_v) + { + /// Generic variant is using IColumn::compare function that only allows to compare columns of identical types. + if (typeid(data) != typeid(target)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Columns {} and {} cannot be compared", data.getName(), target.getName()); + } + const size_t size = offsets.size(); result.resize(size); diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.reference b/tests/queries/0_stateless/02010_array_index_bad_cast.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql new file mode 100644 index 00000000000..19c58bb28a7 --- /dev/null +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -0,0 +1,2 @@ +-- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError 44 } From 6ff43a614c6ab313f5d87a395a436bba52899bf2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Aug 2021 09:52:55 +0300 Subject: [PATCH 22/32] Use only SSE2 in "unbundled" build --- CMakeLists.txt | 5 +++-- docker/packager/packager | 8 +++++++- release | 3 --- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1727caea766..35c22526816 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -395,9 +395,10 @@ endif () # Turns on all external libs like s3, kafka, ODBC, ... option(ENABLE_LIBRARIES "Enable all external libraries by default" ON) -# We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your -# system. +# We recommend avoiding this mode for production builds because we can't guarantee +# all needed libraries exist in your system. # This mode exists for enthusiastic developers who are searching for trouble. +# The whole idea of using unknown version of libraries from the OS distribution is deeply flawed. # Useful for maintainers of OS packages. option (UNBUNDLED "Use system libraries instead of ones in contrib/" OFF) diff --git a/docker/packager/packager b/docker/packager/packager index c05c85d3e28..857df079281 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -151,8 +151,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ cmake_flags.append('-DENABLE_TESTS=1') cmake_flags.append('-DUSE_GTEST=1') + # "Unbundled" build is not suitable for any production usage. + # But it is occasionally used by some developers. + # The whole idea of using unknown version of libraries from the OS distribution is deeply flawed. + # We wish these developers good luck. if unbundled: - cmake_flags.append('-DUNBUNDLED=1 -DUSE_INTERNAL_RDKAFKA_LIBRARY=1 -DENABLE_ARROW=0 -DENABLE_AVRO=0 -DENABLE_ORC=0 -DENABLE_PARQUET=0') + # We also disable all CPU features except basic x86_64. + # It is only slightly related to "unbundled" build, but it is a good place to test if code compiled without these instruction sets. + cmake_flags.append('-DUNBUNDLED=1 -DUSE_INTERNAL_RDKAFKA_LIBRARY=1 -DENABLE_ARROW=0 -DENABLE_AVRO=0 -DENABLE_ORC=0 -DENABLE_PARQUET=0 -DENABLE_SSSE3=0 -DENABLE_SSE41=0 -DENABLE_SSE42=0 -DENABLE_PCLMULQDQ=0 -DENABLE_POPCNT=0 -DENABLE_AVX=0 -DENABLE_AVX2=0') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') diff --git a/release b/release index 9484d79630a..de549595d43 100755 --- a/release +++ b/release @@ -71,9 +71,6 @@ then export DEB_CC=${DEB_CC=clang-10} export DEB_CXX=${DEB_CXX=clang++-10} EXTRAPACKAGES="$EXTRAPACKAGES clang-10 lld-10" -elif [[ $BUILD_TYPE == 'valgrind' ]]; then - MALLOC_OPTS="-DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0" - VERSION_POSTFIX+="+valgrind" elif [[ $BUILD_TYPE == 'debug' ]]; then CMAKE_BUILD_TYPE=Debug VERSION_POSTFIX+="+debug" From c06f212bb5d4e8058334dc3b34fefea4174591e5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 15 Aug 2021 09:55:54 +0300 Subject: [PATCH 23/32] Update packager --- docker/packager/packager | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/packager b/docker/packager/packager index 857df079281..95b7fcd8568 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -157,7 +157,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ # We wish these developers good luck. if unbundled: # We also disable all CPU features except basic x86_64. - # It is only slightly related to "unbundled" build, but it is a good place to test if code compiled without these instruction sets. + # It is only slightly related to "unbundled" build, but it is a good place to test if code compiles without these instruction sets. cmake_flags.append('-DUNBUNDLED=1 -DUSE_INTERNAL_RDKAFKA_LIBRARY=1 -DENABLE_ARROW=0 -DENABLE_AVRO=0 -DENABLE_ORC=0 -DENABLE_PARQUET=0 -DENABLE_SSSE3=0 -DENABLE_SSE41=0 -DENABLE_SSE42=0 -DENABLE_PCLMULQDQ=0 -DENABLE_POPCNT=0 -DENABLE_AVX=0 -DENABLE_AVX2=0') if split_binary: From ba0ba988bafa027b08541dd9c34286fecc53518b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Aug 2021 11:21:30 +0300 Subject: [PATCH 24/32] Fix build --- contrib/libmetrohash/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt index 9304cb3644c..4ec5a58717d 100644 --- a/contrib/libmetrohash/CMakeLists.txt +++ b/contrib/libmetrohash/CMakeLists.txt @@ -2,9 +2,5 @@ set (SRCS src/metrohash64.cpp src/metrohash128.cpp ) -if (HAVE_SSE42) # Not used. Pretty easy to port. - list (APPEND SRCS src/metrohash128crc.cpp) -endif () - add_library(metrohash ${SRCS}) target_include_directories(metrohash PUBLIC src) From 97d921d4da75105094e869ed7b25730946569eeb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Aug 2021 11:38:16 +0300 Subject: [PATCH 25/32] Remove trash --- src/Compression/CompressedWriteBuffer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 8d146e8de23..1c15dd77f14 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -30,10 +30,6 @@ void CompressedWriteBuffer::nextImpl() compressed_buffer.resize(compressed_reserve_size); UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data()); - // FIXME remove this after fixing msan report in lz4. - // Almost always reproduces on stateless tests, the exact test unknown. - __msan_unpoison(compressed_buffer.data(), compressed_size); - CityHash_v1_0_2::uint128 checksum = CityHash_v1_0_2::CityHash128(compressed_buffer.data(), compressed_size); out.write(reinterpret_cast(&checksum), CHECKSUM_SIZE); out.write(compressed_buffer.data(), compressed_size); From f66e8464f94b41624099d2a71151a52778ed74d8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 15 Aug 2021 09:55:43 +0300 Subject: [PATCH 26/32] Some final fixes --- .../InterpreterSelectIntersectExceptQuery.cpp | 7 +- .../InterpreterSelectIntersectExceptQuery.h | 5 +- .../SelectIntersectExceptQueryVisitor.cpp | 6 +- src/Parsers/ASTSelectIntersectExceptQuery.cpp | 5 +- src/Parsers/ExpressionListParsers.cpp | 86 +++---------------- src/Parsers/ExpressionListParsers.h | 17 +++- src/Parsers/ParserQueryWithOutput.cpp | 1 + src/Parsers/ParserUnionQueryElement.cpp | 3 +- .../QueryPlan/IntersectOrExceptStep.cpp | 6 +- .../Transforms/IntersectOrExceptTransform.cpp | 1 + ...02004_intersect_except_operators.reference | 5 ++ .../02004_intersect_except_operators.sql | 4 + .../02007_test_any_all_operators.reference | 20 +++++ .../02007_test_any_all_operators.sql | 11 ++- 14 files changed, 83 insertions(+), 94 deletions(-) diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 4edd13d08e5..9c8dda56b44 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -89,7 +89,6 @@ InterpreterSelectIntersectExceptQuery::buildCurrentChildInterpreter(const ASTPtr if (ast_ptr_->as()) return std::make_unique(ast_ptr_, context, SelectQueryOptions()); - // if (ast_ptr_->as()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected query: {}", ast_ptr_->getID()); } @@ -140,4 +139,10 @@ BlockIO InterpreterSelectIntersectExceptQuery::execute() return res; } +void InterpreterSelectIntersectExceptQuery::ignoreWithTotals() +{ + for (auto & interpreter : nested_interpreters) + interpreter->ignoreWithTotals(); +} + } diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.h b/src/Interpreters/InterpreterSelectIntersectExceptQuery.h index 9cbde055b0b..805565e4c51 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.h +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.h @@ -28,6 +28,8 @@ public: Block getSampleBlock() { return result_header; } + void ignoreWithTotals() override; + private: static String getName() { return "SelectIntersectExceptQuery"; } @@ -36,9 +38,8 @@ private: void buildQueryPlan(QueryPlan & query_plan) override; - void ignoreWithTotals() override {} - std::vector> nested_interpreters; + Operator final_operator; }; diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp index 190ec279038..e26c4371591 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -12,11 +12,11 @@ namespace ErrorCodes /* * Note: there is a difference between intersect and except behaviour. - * `intersect` is supposed to be a part of last SelectQuery, i.e. the sequence with no parenthesis: + * `intersect` is supposed to be a part of the last SelectQuery, i.e. the sequence with no parenthesis: * select 1 union all select 2 except select 1 intersect 2 except select 2 union distinct select 5; * is interpreted as: * select 1 union all select 2 except (select 1 intersect 2) except select 2 union distinct select 5; - * Whereas `except` is applied to all union part like: + * Whereas `except` is applied to all left union part like: * (((select 1 union all select 2) except (select 1 intersect 2)) except select 2) union distinct select 5; **/ @@ -28,7 +28,7 @@ void SelectIntersectExceptQueryMatcher::visit(ASTPtr & ast, Data & data) void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Data &) { - auto & union_modes = ast.list_of_modes; + const auto & union_modes = ast.list_of_modes; if (union_modes.empty()) return; diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.cpp b/src/Parsers/ASTSelectIntersectExceptQuery.cpp index 9d7a717fa6c..3b9cb0a2c16 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.cpp +++ b/src/Parsers/ASTSelectIntersectExceptQuery.cpp @@ -30,11 +30,10 @@ void ASTSelectIntersectExceptQuery::formatQueryImpl(const FormatSettings & setti { settings.ostr << settings.nl_or_ws << indent_str << (settings.hilite ? hilite_keyword : "") << (final_operator == Operator::INTERSECT ? "INTERSECT" : "EXCEPT") - << (settings.hilite ? hilite_none : ""); + << (settings.hilite ? hilite_none : "") + << settings.nl_or_ws; } - if (it != children.begin()) - settings.ostr << settings.nl_or_ws; (*it)->formatImpl(settings, state, frame); } } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 69d95422799..58f5e766905 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -277,79 +277,6 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type) return true; } -bool ParserComparisonExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - bool first = true; - - auto current_depth = pos.depth; - while (true) - { - if (first) - { - ASTPtr elem; - if (!elem_parser.parse(pos, elem, expected)) - return false; - - node = elem; - first = false; - } - else - { - /// try to find any of the valid operators - const char ** it; - Expected stub; - for (it = overlapping_operators_to_skip; *it; ++it) - if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) - break; - - if (*it) - break; - - for (it = operators; *it; it += 2) - if (parseOperator(pos, *it, expected)) - break; - - if (!*it) - break; - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - ASTPtr elem; - SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; - if (ParserKeyword("ANY").ignore(pos, expected)) - subquery_function_type = SubqueryFunctionType::ANY; - else if (ParserKeyword("ALL").ignore(pos, expected)) - subquery_function_type = SubqueryFunctionType::ALL; - else if (!elem_parser.parse(pos, elem, expected)) - return false; - - if (subquery_function_type != SubqueryFunctionType::NONE && !ParserSubquery().parse(pos, elem, expected)) - return false; - - /// the first argument of the function is the previous element, the second is the next one - function->name = it[1]; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(node); - exp_list->children.push_back(elem); - - if (subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function, subquery_function_type)) - return false; - - pos.increaseDepth(); - node = function; - } - } - - pos.depth = current_depth; - return true; -} - bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { bool first = true; @@ -393,7 +320,15 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node auto exp_list = std::make_shared(); ASTPtr elem; - if (!(remaining_elem_parser ? remaining_elem_parser : first_elem_parser)->parse(pos, elem, expected)) + SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; + if (allow_any_all_operators && ParserKeyword("ANY").ignore(pos, expected)) + subquery_function_type = SubqueryFunctionType::ANY; + else if (allow_any_all_operators && ParserKeyword("ALL").ignore(pos, expected)) + subquery_function_type = SubqueryFunctionType::ALL; + else if (!(remaining_elem_parser ? remaining_elem_parser : first_elem_parser)->parse(pos, elem, expected)) + return false; + + if (subquery_function_type != SubqueryFunctionType::NONE && !ParserSubquery().parse(pos, elem, expected)) return false; /// the first argument of the function is the previous element, the second is the next one @@ -404,6 +339,9 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node exp_list->children.push_back(node); exp_list->children.push_back(elem); + if (allow_any_all_operators && subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function, subquery_function_type)) + return false; + /** special exception for the access operator to the element of the array `x[y]`, which * contains the infix part '[' and the suffix ''] '(specified as' [') */ diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index e44cacb313f..17deec4e9e4 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -121,6 +121,8 @@ private: Operators_t overlapping_operators_to_skip = { (const char *[]){ nullptr } }; ParserPtr first_elem_parser; ParserPtr remaining_elem_parser; + /// =, !=, <, > ALL (subquery) / ANY (subquery) + bool allow_any_all_operators = false; public: /** `operators_` - allowed operators and their corresponding functions @@ -130,8 +132,10 @@ public: { } - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, Operators_t overlapping_operators_to_skip_, ParserPtr && first_elem_parser_) - : operators(operators_), overlapping_operators_to_skip(overlapping_operators_to_skip_), first_elem_parser(std::move(first_elem_parser_)) + ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, + Operators_t overlapping_operators_to_skip_, ParserPtr && first_elem_parser_, bool allow_any_all_operators_ = false) + : operators(operators_), overlapping_operators_to_skip(overlapping_operators_to_skip_), + first_elem_parser(std::move(first_elem_parser_)), allow_any_all_operators(allow_any_all_operators_) { } @@ -341,12 +345,16 @@ class ParserComparisonExpression : public IParserBase private: static const char * operators[]; static const char * overlapping_operators_to_skip[]; - ParserBetweenExpression elem_parser; + ParserLeftAssociativeBinaryOperatorList operator_parser {operators, + overlapping_operators_to_skip, std::make_unique(), true}; protected: const char * getName() const override{ return "comparison expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + return operator_parser.parse(pos, node, expected); + } }; /** Parser for nullity checking with IS (NOT) NULL. @@ -355,6 +363,7 @@ class ParserNullityChecking : public IParserBase { private: ParserComparisonExpression elem_parser; + protected: const char * getName() const override { return "nullity checking"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 4a73952674c..82f9f561187 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -24,6 +24,7 @@ #include #include + namespace DB { diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index d59a7be2278..efd022e6362 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -10,8 +10,7 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserSubquery().parse(pos, node, expected) - && !ParserSelectQuery().parse(pos, node, expected)) + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) return false; if (const auto * ast_subquery = node->as()) diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index b75898b815b..d1bb1eb41e9 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -36,10 +36,7 @@ IntersectOrExceptStep::IntersectOrExceptStep( , max_threads(max_threads_) { input_streams = std::move(input_streams_); - if (input_streams.size() == 1) - output_stream = input_streams.front(); - else - output_stream = DataStream{.header = header}; + output_stream = DataStream{.header = header}; } QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, const BuildQueryPipelineSettings &) @@ -71,6 +68,7 @@ QueryPipelinePtr IntersectOrExceptStep::updatePipeline(QueryPipelines pipelines, }); } + /// For the case of union. cur_pipeline->addTransform(std::make_shared(header, cur_pipeline->getNumStreams(), 1)); } diff --git a/src/Processors/Transforms/IntersectOrExceptTransform.cpp b/src/Processors/Transforms/IntersectOrExceptTransform.cpp index abfd1a7f0ad..3e39123ae4b 100644 --- a/src/Processors/Transforms/IntersectOrExceptTransform.cpp +++ b/src/Processors/Transforms/IntersectOrExceptTransform.cpp @@ -4,6 +4,7 @@ namespace DB { +/// After visitor is applied, ASTSelectIntersectExcept always has two child nodes. IntersectOrExceptTransform::IntersectOrExceptTransform(const Block & header_, Operator operator_) : IProcessor(InputPorts(2, header_), {header_}) , current_operator(operator_) diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.reference b/tests/queries/0_stateless/02004_intersect_except_operators.reference index 7f41faaf83a..03b881f690b 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.reference +++ b/tests/queries/0_stateless/02004_intersect_except_operators.reference @@ -70,6 +70,10 @@ select * from (select 1 intersect select 1); 1 with (select number from numbers(10) intersect select 5) as a select a * 10; 50 +with (select 5 except select 1) as a select a except select 5; +with (select number from numbers(10) intersect select 5) as a select a intersect select 1; +with (select number from numbers(10) intersect select 5) as a select a except select 1; +5 select count() from (select number from numbers(10) except select 5); 9 select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000)); @@ -102,6 +106,7 @@ select * from (select 1 union all select 2 union all select 3 union all select 4 select 1 intersect (select 1 except select 2); 1 select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; +select 1 intersect select count() from (select 1 except select 2 intersect select 2 union all select 1); explain syntax select 1 intersect select 1; SELECT 1 INTERSECT diff --git a/tests/queries/0_stateless/02004_intersect_except_operators.sql b/tests/queries/0_stateless/02004_intersect_except_operators.sql index ef0e52da116..7f08cc0adf2 100644 --- a/tests/queries/0_stateless/02004_intersect_except_operators.sql +++ b/tests/queries/0_stateless/02004_intersect_except_operators.sql @@ -21,6 +21,9 @@ select number from numbers(100) intersect select number from numbers(20, 60) exc select * from (select 1 intersect select 1); with (select number from numbers(10) intersect select 5) as a select a * 10; +with (select 5 except select 1) as a select a except select 5; +with (select number from numbers(10) intersect select 5) as a select a intersect select 1; +with (select number from numbers(10) intersect select 5) as a select a except select 1; select count() from (select number from numbers(10) except select 5); select count() from (select number from numbers(1000000) intersect select number from numbers(200000, 600000)); select count() from (select number from numbers(100) intersect select number from numbers(20, 60) except select number from numbers(30, 20) except select number from numbers(60, 20)); @@ -35,6 +38,7 @@ select * from (select 1 union all select 2 union all select 3 union all select 4 select 1 intersect (select 1 except select 2); select 1 union all select 2 except (select 2 except select 1 union all select 1) except select 4; +select 1 intersect select count() from (select 1 except select 2 intersect select 2 union all select 1); explain syntax select 1 intersect select 1; explain syntax select 1 except select 1; diff --git a/tests/queries/0_stateless/02007_test_any_all_operators.reference b/tests/queries/0_stateless/02007_test_any_all_operators.reference index ebd7cd8f6ca..a232320d15c 100644 --- a/tests/queries/0_stateless/02007_test_any_all_operators.reference +++ b/tests/queries/0_stateless/02007_test_any_all_operators.reference @@ -29,3 +29,23 @@ select number as a from numbers(10) where a != any (select 5 from numbers(3, 3)) 7 8 9 +select 1 < any (select 1 from numbers(10)); +0 +select 1 <= any (select 1 from numbers(10)); +1 +select 1 < any (select number from numbers(10)); +1 +select 1 > any (select number from numbers(10)); +1 +select 1 >= any (select number from numbers(10)); +1 +select 11 > all (select number from numbers(10)); +1 +select 11 <= all (select number from numbers(11)); +0 +select 11 < all (select 11 from numbers(10)); +0 +select 11 > all (select 11 from numbers(10)); +0 +select 11 >= all (select 11 from numbers(10)); +1 diff --git a/tests/queries/0_stateless/02007_test_any_all_operators.sql b/tests/queries/0_stateless/02007_test_any_all_operators.sql index 525f7e1fabd..10d7325afca 100644 --- a/tests/queries/0_stateless/02007_test_any_all_operators.sql +++ b/tests/queries/0_stateless/02007_test_any_all_operators.sql @@ -8,10 +8,19 @@ select 1 != all (select number from numbers(10)); select 1 == all (select 1 from numbers(10)); select 1 == all (select number from numbers(10)); - select 1 != any (select 1 from numbers(10)); select 1 != any (select number from numbers(10)); select number as a from numbers(10) where a == any (select number from numbers(3, 3)); select number as a from numbers(10) where a != any (select 5 from numbers(3, 3)); +select 1 < any (select 1 from numbers(10)); +select 1 <= any (select 1 from numbers(10)); +select 1 < any (select number from numbers(10)); +select 1 > any (select number from numbers(10)); +select 1 >= any (select number from numbers(10)); +select 11 > all (select number from numbers(10)); +select 11 <= all (select number from numbers(11)); +select 11 < all (select 11 from numbers(10)); +select 11 > all (select 11 from numbers(10)); +select 11 >= all (select 11 from numbers(10)); From 9ae92798d109e58e95c3a50d3e50227624d98b9a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Aug 2021 11:44:12 +0300 Subject: [PATCH 27/32] Fix stress test in ~CompressedWriteBuffer --- src/Compression/CompressedWriteBuffer.cpp | 7 +++++++ src/Compression/CompressedWriteBuffer.h | 1 + 2 files changed, 8 insertions(+) diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 1c15dd77f14..7454e09fda0 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -36,6 +36,12 @@ void CompressedWriteBuffer::nextImpl() } +void CompressedWriteBuffer::finalize() +{ + next(); +} + + CompressedWriteBuffer::CompressedWriteBuffer( WriteBuffer & out_, CompressionCodecPtr codec_, @@ -44,6 +50,7 @@ CompressedWriteBuffer::CompressedWriteBuffer( { } + CompressedWriteBuffer::~CompressedWriteBuffer() { /// FIXME move final flush into the caller diff --git a/src/Compression/CompressedWriteBuffer.h b/src/Compression/CompressedWriteBuffer.h index a9612b463a5..2268b7bec50 100644 --- a/src/Compression/CompressedWriteBuffer.h +++ b/src/Compression/CompressedWriteBuffer.h @@ -22,6 +22,7 @@ private: PODArray compressed_buffer; void nextImpl() override; + void finalize() override; public: CompressedWriteBuffer( From e28c9c0ba7cea51ffe5b2d1c41d13bc0832270d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 15 Aug 2021 11:54:28 +0300 Subject: [PATCH 28/32] Mark tests for DatabaseReplicated as green --- docker/test/stateless/process_functional_tests_result.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/test/stateless/process_functional_tests_result.py b/docker/test/stateless/process_functional_tests_result.py index e60424ad4d1..a42b0e68d88 100755 --- a/docker/test/stateless/process_functional_tests_result.py +++ b/docker/test/stateless/process_functional_tests_result.py @@ -105,6 +105,10 @@ def process_result(result_path): description += ", skipped: {}".format(skipped) if unknown != 0: description += ", unknown: {}".format(unknown) + + # Temporary green for tests with DatabaseReplicated: + if 1 == int(os.environ.get('USE_DATABASE_REPLICATED', 0)): + state = "success" else: state = "failure" description = "Output log doesn't exist" From 86694a2bbbb049f74e80608527a39fc1fde9aa22 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 15 Aug 2021 12:04:38 +0300 Subject: [PATCH 29/32] Update CompressedWriteBuffer.h --- src/Compression/CompressedWriteBuffer.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Compression/CompressedWriteBuffer.h b/src/Compression/CompressedWriteBuffer.h index 2268b7bec50..57ba679855e 100644 --- a/src/Compression/CompressedWriteBuffer.h +++ b/src/Compression/CompressedWriteBuffer.h @@ -22,7 +22,6 @@ private: PODArray compressed_buffer; void nextImpl() override; - void finalize() override; public: CompressedWriteBuffer( @@ -30,6 +29,8 @@ public: CompressionCodecPtr codec_ = CompressionCodecFactory::instance().getDefaultCodec(), size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + void finalize() override; + /// The amount of compressed data size_t getCompressedBytes() { From 5f5470c2cdefdf57e6de9bd49b7312f7bc2a3e50 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 15 Aug 2021 13:32:56 +0300 Subject: [PATCH 30/32] Removed DenseHashMap, DenseHashSet --- src/Common/DenseHashMap.h | 29 ------ src/Common/DenseHashSet.h | 25 ----- src/Common/SparseHashMap.h | 1 - src/Core/NamesAndTypes.cpp | 12 +-- src/Storages/MergeTree/IMergeTreeReader.cpp | 9 +- src/Storages/MergeTree/IMergeTreeReader.h | 5 +- src/Storages/StorageInMemoryMetadata.cpp | 102 +++++++++++--------- 7 files changed, 72 insertions(+), 111 deletions(-) delete mode 100644 src/Common/DenseHashMap.h delete mode 100644 src/Common/DenseHashSet.h diff --git a/src/Common/DenseHashMap.h b/src/Common/DenseHashMap.h deleted file mode 100644 index 9ac21c82676..00000000000 --- a/src/Common/DenseHashMap.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once -#include - -/// DenseHashMap is a wrapper for google::dense_hash_map. -/// Some hacks are needed to make it work in "Arcadia". -/// "Arcadia" is a proprietary monorepository in Yandex. -/// It uses slightly changed version of sparsehash with a different set of hash functions (which we don't need). -/// Those defines are needed to make it compile. -#if defined(ARCADIA_BUILD) -#define HASH_FUN_H -template -struct THash; -#endif - -#include - -#if !defined(ARCADIA_BUILD) - template , - class EqualKey = std::equal_to, - class Alloc = google::libc_allocator_with_realloc>> - using DenseHashMap = google::dense_hash_map; -#else - template , - class EqualKey = std::equal_to, - class Alloc = google::sparsehash::libc_allocator_with_realloc>> - using DenseHashMap = google::sparsehash::dense_hash_map; - - #undef THash -#endif diff --git a/src/Common/DenseHashSet.h b/src/Common/DenseHashSet.h deleted file mode 100644 index e8c06f36aa3..00000000000 --- a/src/Common/DenseHashSet.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -/// DenseHashSet is a wrapper for google::dense_hash_set. -/// See comment in DenseHashMap.h -#if defined(ARCADIA_BUILD) -#define HASH_FUN_H -template -struct THash; -#endif - -#include - -#if !defined(ARCADIA_BUILD) - template , - class EqualKey = std::equal_to, - class Alloc = google::libc_allocator_with_realloc> - using DenseHashSet = google::dense_hash_set; -#else - template , - class EqualKey = std::equal_to, - class Alloc = google::sparsehash::libc_allocator_with_realloc> - using DenseHashSet = google::sparsehash::dense_hash_set; - - #undef THash -#endif diff --git a/src/Common/SparseHashMap.h b/src/Common/SparseHashMap.h index f01fc633d84..0f86cc13612 100644 --- a/src/Common/SparseHashMap.h +++ b/src/Common/SparseHashMap.h @@ -1,7 +1,6 @@ #pragma once /// SparseHashMap is a wrapper for google::sparse_hash_map. -/// See comment in DenseHashMap.h #if defined(ARCADIA_BUILD) #define HASH_FUN_H template diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index 54f83fc13fc..b47f5a6823b 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,7 +7,6 @@ #include #include #include -#include namespace DB @@ -163,8 +163,7 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const { /// NOTE: It's better to make a map in `IStorage` than to create it here every time again. - DenseHashMap types; - types.set_empty_key(StringRef()); + HashMapWithSavedHash types; for (const auto & column : *this) types[column.name] = &column.type; @@ -172,10 +171,11 @@ NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const NamesAndTypesList res; for (const String & name : names) { - auto it = types.find(name); + const auto * it = types.find(name); if (it == types.end()) - throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN); - res.emplace_back(name, *it->second); + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "No column {}", name); + + res.emplace_back(name, *it->getMapped()); } return res; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 5378b84a5d0..d659259e1a9 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -48,7 +48,6 @@ IMergeTreeReader::IMergeTreeReader( part_columns = Nested::collect(part_columns); } - columns_from_part.set_empty_key(StringRef()); for (const auto & column_from_part : part_columns) columns_from_part[column_from_part.name] = &column_from_part.type; } @@ -213,7 +212,7 @@ NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & requ { auto name_in_storage = required_column.getNameInStorage(); - decltype(columns_from_part.begin()) it; + ColumnsFromPart::ConstLookupResult it; if (alter_conversions.isColumnRenamed(name_in_storage)) { String old_name = alter_conversions.getColumnOldName(name_in_storage); @@ -227,7 +226,7 @@ NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & requ if (it == columns_from_part.end()) return required_column; - const auto & type = *it->second; + const DataTypePtr & type = *it->getMapped(); if (required_column.isSubcolumn()) { auto subcolumn_name = required_column.getSubcolumnName(); @@ -236,10 +235,10 @@ NameAndTypePair IMergeTreeReader::getColumnFromPart(const NameAndTypePair & requ if (!subcolumn_type) return required_column; - return {String(it->first), subcolumn_name, type, subcolumn_type}; + return {String(it->getKey()), subcolumn_name, type, subcolumn_type}; } - return {String(it->first), type}; + return {String(it->getKey()), type}; } void IMergeTreeReader::performRequiredConversions(Columns & res_columns) diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 8d80719efaf..696cc2f105b 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -95,7 +95,8 @@ private: /// Actual data type of columns in part - DenseHashMap columns_from_part; + using ColumnsFromPart = HashMapWithSavedHash; + ColumnsFromPart columns_from_part; }; } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 91f69cdac7d..5183b925141 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -1,7 +1,7 @@ #include -#include -#include +#include +#include #include #include #include @@ -320,8 +320,7 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns( { Block res; - DenseHashMap virtuals_map; - virtuals_map.set_empty_key(StringRef()); + HashMapWithSavedHash virtuals_map; /// Virtual columns must be appended after ordinary, because user can /// override them. @@ -335,9 +334,9 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns( { res.insert({column->type->createColumn(), column->type, column->name}); } - else if (auto it = virtuals_map.find(name); it != virtuals_map.end()) + else if (auto * it = virtuals_map.find(name); it != virtuals_map.end()) { - const auto & type = *it->second; + const auto & type = *it->getMapped(); res.insert({type->createColumn(), type, name}); } else @@ -470,8 +469,8 @@ bool StorageInMemoryMetadata::hasSelectQuery() const namespace { - using NamesAndTypesMap = DenseHashMap; - using UniqueStrings = DenseHashSet; + using NamesAndTypesMap = HashMapWithSavedHash; + using UniqueStrings = HashSetWithSavedHash; String listOfColumns(const NamesAndTypesList & available_columns) { @@ -488,20 +487,12 @@ namespace NamesAndTypesMap getColumnsMap(const NamesAndTypesList & columns) { NamesAndTypesMap res; - res.set_empty_key(StringRef()); for (const auto & column : columns) res.insert({column.name, column.type.get()}); return res; } - - UniqueStrings initUniqueStrings() - { - UniqueStrings strings; - strings.set_empty_key(StringRef()); - return strings; - } } void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const @@ -514,11 +505,12 @@ void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTy } const auto virtuals_map = getColumnsMap(virtuals); - auto unique_names = initUniqueStrings(); + UniqueStrings unique_names; for (const auto & name : column_names) { - bool has_column = getColumns().hasColumnOrSubcolumn(ColumnsDescription::AllPhysical, name) || virtuals_map.count(name); + bool has_column = getColumns().hasColumnOrSubcolumn(ColumnsDescription::AllPhysical, name) + || virtuals_map.find(name) != nullptr; if (!has_column) { @@ -540,23 +532,31 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); const auto columns_map = getColumnsMap(available_columns); - auto unique_names = initUniqueStrings(); + UniqueStrings unique_names; + for (const NameAndTypePair & column : provided_columns) { - auto it = columns_map.find(column.name); + const auto * it = columns_map.find(column.name); if (columns_map.end() == it) throw Exception( - "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column with name {}. There are columns: {}", + column.name, + listOfColumns(available_columns)); - if (!column.type->equals(*it->second)) + if (!column.type->equals(*it->getMapped())) throw Exception( - "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type " - + column.type->getName(), - ErrorCodes::TYPE_MISMATCH); + ErrorCodes::TYPE_MISMATCH, + "Type mismatch for column {}. Column has type {}, got type {}", + column.name, + it->getMapped()->getName(), + column.type->getName()); if (unique_names.end() != unique_names.find(column.name)) - throw Exception("Column " + column.name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); + throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE, + "Column {} queried more than once", + column.name); + unique_names.insert(column.name); } } @@ -572,26 +572,38 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, "Empty list of columns queried. There are columns: " + listOfColumns(available_columns), ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); - auto unique_names = initUniqueStrings(); + UniqueStrings unique_names; + for (const String & name : column_names) { - auto it = provided_columns_map.find(name); + const auto * it = provided_columns_map.find(name); if (provided_columns_map.end() == it) continue; - auto jt = available_columns_map.find(name); + const auto * jt = available_columns_map.find(name); if (available_columns_map.end() == jt) throw Exception( - "There is no column with name " + name + ". There are columns: " + listOfColumns(available_columns), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column with name {}. There are columns: {}", + name, + listOfColumns(available_columns)); - if (!it->second->equals(*jt->second)) + const auto & provided_column_type = *it->getMapped(); + const auto & available_column_type = *jt->getMapped(); + + if (!provided_column_type.equals(available_column_type)) throw Exception( - "Type mismatch for column " + name + ". Column has type " + jt->second->getName() + ", got type " + it->second->getName(), - ErrorCodes::TYPE_MISMATCH); + ErrorCodes::TYPE_MISMATCH, + "Type mismatch for column {}. Column has type {}, got type {}", + name, + provided_column_type.getName(), + available_column_type.getName()); if (unique_names.end() != unique_names.find(name)) - throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); + throw Exception(ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE, + "Column {} queried more than once", + name); + unique_names.insert(name); } } @@ -612,17 +624,21 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const names_in_block.insert(column.name); - auto it = columns_map.find(column.name); + const auto * it = columns_map.find(column.name); if (columns_map.end() == it) throw Exception( - "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "There is no column with name {}. There are columns: {}", + column.name, + listOfColumns(available_columns)); - if (!column.type->equals(*it->second)) + if (!column.type->equals(*it->getMapped())) throw Exception( - "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type " - + column.type->getName(), - ErrorCodes::TYPE_MISMATCH); + ErrorCodes::TYPE_MISMATCH, + "Type mismatch for column {}. Column has type {}, got type {}", + column.name, + it->getMapped()->getName(), + column.type->getName()); } if (need_all && names_in_block.size() < columns_map.size()) From 404eac198684bf08273dfc91743c7c9cadb17534 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 15 Aug 2021 13:52:44 +0300 Subject: [PATCH 31/32] Fixed tests --- ...map_add_map_subtract_on_map_type.reference | 4 +-- ...01925_map_populate_series_on_map.reference | 34 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference index 304f7407cf5..de34b856130 100644 --- a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference @@ -47,9 +47,9 @@ {'b':2} Map(Enum8(\'a\' = 1, \'b\' = 2), Int64) {'00000000-89ab-cdef-0123-456789abcdef':2} Map(UUID, Int64) {'11111111-89ab-cdef-0123-456789abcdef':4} Map(UUID, Int64) -{1:0,2:0} Map(UInt8,UInt64) +{1:0,2:0} Map(UInt8, UInt64) {1:18446744073709551615,2:18446744073709551615} Map(UInt8, UInt64) -{1:-1,2:-1} Map(UInt8,Int64) +{1:-1,2:-1} Map(UInt8, Int64) {1:-1.0999999761581423,2:0} Map(UInt8, Float64) {1:-1,2:-1} Map(UInt8, Int64) {1:-2,2:-2,3:1} Map(UInt8, Int64) diff --git a/tests/queries/0_stateless/01925_map_populate_series_on_map.reference b/tests/queries/0_stateless/01925_map_populate_series_on_map.reference index 235a227f548..fd3d3b2450d 100644 --- a/tests/queries/0_stateless/01925_map_populate_series_on_map.reference +++ b/tests/queries/0_stateless/01925_map_populate_series_on_map.reference @@ -29,39 +29,39 @@ select mapPopulateSeries(m, n) from map_test; {1:1,2:0,3:0,4:0,5:2,6:0} drop table map_test; select mapPopulateSeries(map(toUInt8(1), toUInt8(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(UInt8,UInt8) +{1:1,2:1} Map(UInt8, UInt8) select mapPopulateSeries(map(toUInt16(1), toUInt16(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(UInt16,UInt16) +{1:1,2:1} Map(UInt16, UInt16) select mapPopulateSeries(map(toUInt32(1), toUInt32(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(UInt32,UInt32) +{1:1,2:1} Map(UInt32, UInt32) select mapPopulateSeries(map(toUInt64(1), toUInt64(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(UInt64,UInt64) +{1:1,2:1} Map(UInt64, UInt64) select mapPopulateSeries(map(toUInt128(1), toUInt128(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(UInt128,UInt128) +{1:1,2:1} Map(UInt128, UInt128) select mapPopulateSeries(map(toUInt256(1), toUInt256(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(UInt256,UInt256) +{1:1,2:1} Map(UInt256, UInt256) select mapPopulateSeries(map(toInt8(1), toInt8(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(Int16,Int16) +{1:1,2:1} Map(Int16, Int16) select mapPopulateSeries(map(toInt16(1), toInt16(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(Int16,Int16) +{1:1,2:1} Map(Int16, Int16) select mapPopulateSeries(map(toInt32(1), toInt32(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(Int32,Int32) +{1:1,2:1} Map(Int32, Int32) select mapPopulateSeries(map(toInt64(1), toInt64(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(Int64,Int64) +{1:1,2:1} Map(Int64, Int64) select mapPopulateSeries(map(toInt128(1), toInt128(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(Int128,Int128) +{1:1,2:1} Map(Int128, Int128) select mapPopulateSeries(map(toInt256(1), toInt256(1), 2, 1)) as res, toTypeName(res); -{1:1,2:1} Map(Int256,Int256) +{1:1,2:1} Map(Int256, Int256) select mapPopulateSeries(map(toInt8(-10), toInt8(1), 2, 1)) as res, toTypeName(res); -{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int16,Int16) +{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int16, Int16) select mapPopulateSeries(map(toInt16(-10), toInt16(1), 2, 1)) as res, toTypeName(res); -{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int16,Int16) +{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int16, Int16) select mapPopulateSeries(map(toInt32(-10), toInt32(1), 2, 1)) as res, toTypeName(res); -{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int32,Int32) +{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int32, Int32) select mapPopulateSeries(map(toInt64(-10), toInt64(1), 2, 1)) as res, toTypeName(res); -{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int64,Int64) +{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0,-4:0,-3:0,-2:0,-1:0,0:0,1:0,2:1} Map(Int64, Int64) select mapPopulateSeries(map(toInt64(-10), toInt64(1), 2, 1), toInt64(-5)) as res, toTypeName(res); -{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0} Map(Int64,Int64) +{-10:1,-9:0,-8:0,-7:0,-6:0,-5:0} Map(Int64, Int64) select mapPopulateSeries(); -- { serverError 42 } select mapPopulateSeries('asdf'); -- { serverError 43 } select mapPopulateSeries(map('1', 1, '2', 1)) as res, toTypeName(res); -- { serverError 43 } From 6c4c3df96e41425185beb0c471a8dde0ce6f25a7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 15 Aug 2021 16:22:15 +0300 Subject: [PATCH 32/32] Auto version update to [21.9.1.7770] [54454] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 18072566d04..2435335f669 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54454) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 9) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH f48c5af90c2ad51955d1ee3b6b05d006b03e4238) -SET(VERSION_DESCRIBE v21.9.1.1-prestable) -SET(VERSION_STRING 21.9.1.1) +SET(VERSION_GITHASH f063e44131a048ba2d9af8075f03700fd5ec3e69) +SET(VERSION_DESCRIBE v21.9.1.7770-prestable) +SET(VERSION_STRING 21.9.1.7770) # end of autochange