Fixed tests

This commit is contained in:
Maksim Kita 2022-09-06 18:46:30 +02:00
parent ca93ee7479
commit 4f68305dbe
20 changed files with 153 additions and 165 deletions

View File

@ -27,7 +27,7 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat
buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n"; buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n";
getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSSIONS\n"; buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSIONS\n";
getJoinExpressionsNode()->dumpTreeImpl(buffer, format_state, indent + 4); getJoinExpressionsNode()->dumpTreeImpl(buffer, format_state, indent + 4);
} }

View File

@ -27,7 +27,7 @@ namespace DB
* 1. regexp variant: SELECT matcher EXCEPT ('regexp'). * 1. regexp variant: SELECT matcher EXCEPT ('regexp').
* 2. column names list variant: SELECT matcher EXCEPT (column_name_1, ...). * 2. column names list variant: SELECT matcher EXCEPT (column_name_1, ...).
* *
* 3. REPLACE transfomer: * 3. REPLACE transformer:
* REPLACE transformer applies similar transformation as APPLY transformer, but only for expressions * REPLACE transformer applies similar transformation as APPLY transformer, but only for expressions
* that match replacement expression name. * that match replacement expression name.
* *
@ -243,7 +243,7 @@ using ReplaceColumnTransformerNodePtr = std::shared_ptr<ReplaceColumnTransformer
* Strict replace column transformer must use all replacements during matched nodes transformation. * Strict replace column transformer must use all replacements during matched nodes transformation.
* *
* Example: * Example:
* REATE TABLE test_table (id UInt64, value String) ENGINE=TinyLog; * CREATE TABLE test_table (id UInt64, value String) ENGINE=TinyLog;
* SELECT * REPLACE STRICT (1 AS id, 2 AS value_1) FROM test_table; * SELECT * REPLACE STRICT (1 AS id, 2 AS value_1) FROM test_table;
* Such query will throw exception because column name with value1 was not matched by strict REPLACE transformer. * Such query will throw exception because column name with value1 was not matched by strict REPLACE transformer.
*/ */

View File

@ -15,7 +15,7 @@ using AggregateFunctionPtr = std::shared_ptr<const IAggregateFunction>;
/** Function node represents function in query tree. /** Function node represents function in query tree.
* Function syntax: function_name(parameter_1, ...)(argument_1, ...). * Function syntax: function_name(parameter_1, ...)(argument_1, ...).
* If funciton does not have parameters its syntax is function_name(argument_1, ...). * If function does not have parameters its syntax is function_name(argument_1, ...).
* If function does not have arguments its syntax is function_name(). * If function does not have arguments its syntax is function_name().
* *
* In query tree function parameters and arguments are represented by ListNode. * In query tree function parameters and arguments are represented by ListNode.
@ -130,7 +130,7 @@ public:
/** Resolve function node as non aggregate function. /** Resolve function node as non aggregate function.
* It is important that function name is update with resolved function name. * It is important that function name is update with resolved function name.
* Main motiviation for this is query tree optimizations. * Main motivation for this is query tree optimizations.
* Assume we have `multiIf` function with single argument, it can be converted to `if` function. * Assume we have `multiIf` function with single argument, it can be converted to `if` function.
* Function name must be updated accordingly. * Function name must be updated accordingly.
*/ */
@ -138,7 +138,7 @@ public:
/** Resolve function node as aggregate function. /** Resolve function node as aggregate function.
* It is important that function name is update with resolved function name. * It is important that function name is update with resolved function name.
* Main motiviation for this is query tree optimizations. * Main motivation for this is query tree optimizations.
*/ */
void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value); void resolveAsAggregateFunction(AggregateFunctionPtr aggregate_function_value, DataTypePtr result_type_value);

View File

@ -80,7 +80,7 @@ public:
} }
/** Get result type of query tree node that can be used as part of expression. /** Get result type of query tree node that can be used as part of expression.
* If node does not support this method exception is throwed. * If node does not support this method exception is thrown.
* TODO: Maybe this can be a part of ExpressionQueryTreeNode. * TODO: Maybe this can be a part of ExpressionQueryTreeNode.
*/ */
virtual DataTypePtr getResultType() const virtual DataTypePtr getResultType() const

View File

@ -10,7 +10,7 @@
namespace DB namespace DB
{ {
/** Identifier constists from identifier parts. /** Identifier consists from identifier parts.
* Each identifier part is arbitrary long sequence of digits, underscores, lowercase and uppercase letters. * Each identifier part is arbitrary long sequence of digits, underscores, lowercase and uppercase letters.
* Example: a, a.b, a.b.c. * Example: a, a.b, a.b.c.
*/ */
@ -26,21 +26,21 @@ public:
{ {
} }
/// Create Identifier from parts /// Create Identifier from parts
explicit Identifier(std::vector<std::string> && parts_) explicit Identifier(std::vector<std::string> && parts_)
: parts(std::move(parts_)) : parts(std::move(parts_))
, full_name(boost::algorithm::join(parts, ".")) , full_name(boost::algorithm::join(parts, "."))
{ {
} }
/// Create Identifier from full name, full name is splitted with '.' as separator. /// Create Identifier from full name, full name is split with '.' as separator.
explicit Identifier(const std::string & full_name_) explicit Identifier(const std::string & full_name_)
: full_name(full_name_) : full_name(full_name_)
{ {
boost::split(parts, full_name, [](char c) { return c == '.'; }); boost::split(parts, full_name, [](char c) { return c == '.'; });
} }
/// Create Identifier from full name, full name is splitted with '.' as separator. /// Create Identifier from full name, full name is split with '.' as separator.
explicit Identifier(std::string && full_name_) explicit Identifier(std::string && full_name_)
: full_name(std::move(full_name_)) : full_name(std::move(full_name_))
{ {

View File

@ -70,12 +70,12 @@ void JoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si
buffer << '\n' << std::string(indent + 2, ' ') << "LEFT TABLE EXPRESSION\n"; buffer << '\n' << std::string(indent + 2, ' ') << "LEFT TABLE EXPRESSION\n";
getLeftTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); getLeftTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
buffer << '\n' << std::string(indent + 2, ' ') << "RIGHT TABLE EXPRESSSION\n"; buffer << '\n' << std::string(indent + 2, ' ') << "RIGHT TABLE EXPRESSION\n";
getRightTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4); getRightTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
if (getJoinExpression()) if (getJoinExpression())
{ {
buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSSION\n"; buffer << '\n' << std::string(indent + 2, ' ') << "JOIN EXPRESSION\n";
getJoinExpression()->dumpTreeImpl(buffer, format_state, indent + 4); getJoinExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
} }
} }

View File

@ -42,7 +42,7 @@ namespace DB
* Example: SELECT test_table.* FROM test_table. * Example: SELECT test_table.* FROM test_table.
* Example: SELECT a.* FROM test_table AS a. * Example: SELECT a.* FROM test_table AS a.
* *
* Additionaly each matcher can contain transformers, check ColumnTransformers.h. * Additionally each matcher can contain transformers, check ColumnTransformers.h.
* In query tree matchers column transformers are represended as ListNode. * In query tree matchers column transformers are represended as ListNode.
*/ */
enum class MatcherNodeType enum class MatcherNodeType

View File

@ -142,7 +142,7 @@ namespace ErrorCodes
* 1. Try to resolve identifier in expression context. * 1. Try to resolve identifier in expression context.
* 2. Try to resolve identifier in function context, if it is allowed. Example: SELECT func(arguments); Here func identifier cannot be resolved in function context * 2. Try to resolve identifier in function context, if it is allowed. Example: SELECT func(arguments); Here func identifier cannot be resolved in function context
* because query projection does not support that. * because query projection does not support that.
* 3. Try to resolve identifier in talbe context, if it is allowed. Example: SELECT table; Here table identifier cannot be resolved in function context * 3. Try to resolve identifier in table context, if it is allowed. Example: SELECT table; Here table identifier cannot be resolved in function context
* because query projection does not support that. * because query projection does not support that.
* *
* TODO: This does not supported properly before, because matchers could not be resolved from aliases. * TODO: This does not supported properly before, because matchers could not be resolved from aliases.
@ -167,13 +167,13 @@ namespace ErrorCodes
* *
* Additional rules about identifier binding. * Additional rules about identifier binding.
* Bind for identifier to entity means that identifier first part match some node during analysis. * Bind for identifier to entity means that identifier first part match some node during analysis.
* If other parts of identifier cannot be resolved in that node, exception must be throwed. * If other parts of identifier cannot be resolved in that node, exception must be thrown.
* *
* Example: * Example:
* CREATE TABLE test_table (id UInt64, compound_value Tuple(value UInt64)) ENGINE=TinyLog; * CREATE TABLE test_table (id UInt64, compound_value Tuple(value UInt64)) ENGINE=TinyLog;
* SELECT compound_value.value, 1 AS compound_value FROM test_table; * SELECT compound_value.value, 1 AS compound_value FROM test_table;
* Identifier first part compound_value bound to entity with alias compound_value, but nested identifier part cannot be resolved from entity, * Identifier first part compound_value bound to entity with alias compound_value, but nested identifier part cannot be resolved from entity,
* lookup should not be continued, and exception must be throwed because if lookup continues that way identifier can be resolved from join tree. * lookup should not be continued, and exception must be thrown because if lookup continues that way identifier can be resolved from join tree.
* *
* TODO: This was not supported properly before analyzer because nested identifier could not be resolved from alias. * TODO: This was not supported properly before analyzer because nested identifier could not be resolved from alias.
* *
@ -181,7 +181,7 @@ namespace ErrorCodes
* CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog; * CREATE TABLE test_table (id UInt64, value UInt64) ENGINE=TinyLog;
* WITH cast(('Value'), 'Tuple (value UInt64') AS value SELECT (SELECT value FROM test_table); * WITH cast(('Value'), 'Tuple (value UInt64') AS value SELECT (SELECT value FROM test_table);
* Identifier first part value bound to test_table column value, but nested identifier part cannot be resolved from it, * Identifier first part value bound to test_table column value, but nested identifier part cannot be resolved from it,
* lookup should not be continued, and exception must be throwed because if lookup continues identifier can be resolved from parent scope. * lookup should not be continued, and exception must be thrown because if lookup continues identifier can be resolved from parent scope.
* *
* TODO: Add expression name into query tree node. Example: SELECT plus(1, 1). Result: SELECT 2. Expression name of constant node should be 2. * TODO: Add expression name into query tree node. Example: SELECT plus(1, 1). Result: SELECT 2. Expression name of constant node should be 2.
* TODO: Update exception messages * TODO: Update exception messages
@ -366,7 +366,7 @@ struct IdentifierResolveSettings
/// Allow to check join tree during identifier resolution /// Allow to check join tree during identifier resolution
bool allow_to_check_join_tree = true; bool allow_to_check_join_tree = true;
/// Allow to check CTEs during table identifer resolution /// Allow to check CTEs during table identifier resolution
bool allow_to_check_cte = true; bool allow_to_check_cte = true;
/// Allow to check database catalog during table identifier resolution /// Allow to check database catalog during table identifier resolution
@ -1125,7 +1125,7 @@ void QueryAnalyzer::validateTableExpressionModifiers(QueryTreeNodePtr & table_ex
} }
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Table expresion modifiers {} are not supported for subquery {}. In scope {}", "Table expression modifiers {} are not supported for subquery {}. In scope {}",
table_expression_modifiers_error_message, table_expression_modifiers_error_message,
table_expression_node->formatASTForErrorMessage(), table_expression_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage()); scope.scope_node->formatASTForErrorMessage());
@ -1679,7 +1679,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
{ {
if (left_resolved_identifier && right_resolved_identifier) if (left_resolved_identifier && right_resolved_identifier)
throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER, throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER,
"JOIN {} ambigious identifier {}. In scope {}", "JOIN {} ambiguous identifier {}. In scope {}",
table_expression_node->formatASTForErrorMessage(), table_expression_node->formatASTForErrorMessage(),
identifier_lookup.dump(), identifier_lookup.dump(),
scope.scope_node->formatASTForErrorMessage()); scope.scope_node->formatASTForErrorMessage());
@ -1737,7 +1737,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
else else
{ {
throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER, throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER,
"JOIN {} ambigious identifier {}. In scope {}", "JOIN {} ambiguous identifier {}. In scope {}",
table_expression_node->formatASTForErrorMessage(), table_expression_node->formatASTForErrorMessage(),
identifier_lookup.dump(), identifier_lookup.dump(),
scope.scope_node->formatASTForErrorMessage()); scope.scope_node->formatASTForErrorMessage());
@ -1878,7 +1878,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident
* 2. Try to resolve identifier from table columns. * 2. Try to resolve identifier from table columns.
* 3. If there is no FROM section return nullptr. * 3. If there is no FROM section return nullptr.
* 4. If identifier is in table lookup context, check if it has 1 or 2 parts, otherwise throw exception. * 4. If identifier is in table lookup context, check if it has 1 or 2 parts, otherwise throw exception.
* If identifer has 2 parts try to match it with database_name and table_name. * If identifier has 2 parts try to match it with database_name and table_name.
* If identifier has 1 part try to match it with table_name, then try to match it with table alias. * If identifier has 1 part try to match it with table_name, then try to match it with table alias.
* 5. If identifier is in expression lookup context, we first need to bind identifier to some table column using identifier first part. * 5. If identifier is in expression lookup context, we first need to bind identifier to some table column using identifier first part.
* Start with identifier first part, if it match some column name in table try to get column with full identifier name. * Start with identifier first part, if it match some column name in table try to get column with full identifier name.
@ -2005,7 +2005,7 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
* in database catalog. * in database catalog.
* *
* Same is not done for functions because function resolution is more complex, and in case of aggregate functions requires not only name * Same is not done for functions because function resolution is more complex, and in case of aggregate functions requires not only name
* but also argument types, it is responsiblity of resolve function method to handle resolution of function name. * but also argument types, it is responsibility of resolve function method to handle resolution of function name.
* *
* 9. If identifier was not resolved remove it from identifier_lookup_to_resolve_status table. * 9. If identifier was not resolved remove it from identifier_lookup_to_resolve_status table.
* *
@ -2596,7 +2596,7 @@ QueryTreeNodePtr QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node,
* scope - lambda scope. It is client responsibility to create it. * scope - lambda scope. It is client responsibility to create it.
* *
* Resolve steps: * Resolve steps:
* 1. Valide arguments. * 1. Validate arguments.
* 2. Register lambda in lambdas in resolve process. This is necessary to prevent recursive lambda resolving. * 2. Register lambda in lambdas in resolve process. This is necessary to prevent recursive lambda resolving.
* 3. Initialize scope with lambda aliases. * 3. Initialize scope with lambda aliases.
* 4. Validate lambda argument names, and scope expressions. * 4. Validate lambda argument names, and scope expressions.
@ -2966,7 +2966,7 @@ void QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, IdentifierResolveSc
} }
/** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function. /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function.
* Then each lambda arguments are initalized with columns, where column source is lambda. * Then each lambda arguments are initialized with columns, where column source is lambda.
* This information is important for later steps of query processing. * This information is important for later steps of query processing.
* Example: SELECT arrayMap(x -> x + 1, [1, 2, 3]). * Example: SELECT arrayMap(x -> x + 1, [1, 2, 3]).
* lambda node x -> x + 1 identifier x is resolved as column where source is lambda node. * lambda node x -> x + 1 identifier x is resolved as column where source is lambda node.
@ -3122,7 +3122,7 @@ void QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, IdentifierResolveSc
* 2. Call specific resolve method depending on node type. * 2. Call specific resolve method depending on node type.
* *
* If allow_table_expression = true and node is query node, then it is not evaluated as scalar subquery. * If allow_table_expression = true and node is query node, then it is not evaluated as scalar subquery.
* Althought if node is identifier that is resolved into query node that query is evaluated as scalar subquery. * Although if node is identifier that is resolved into query node that query is evaluated as scalar subquery.
* SELECT id, (SELECT 1) AS c FROM test_table WHERE a IN c; * SELECT id, (SELECT 1) AS c FROM test_table WHERE a IN c;
* SELECT id, FROM test_table WHERE a IN (SELECT 1); * SELECT id, FROM test_table WHERE a IN (SELECT 1);
* *

View File

@ -361,7 +361,7 @@ ASTPtr QueryNode::toASTImpl() const
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST()); select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST());
auto result_select_query = std::make_shared<ASTSelectWithUnionQuery>(); auto result_select_query = std::make_shared<ASTSelectWithUnionQuery>();
result_select_query->union_mode = SelectUnionMode::Unspecified; result_select_query->union_mode = SelectUnionMode::UNION_DEFAULT;
auto list_of_selects = std::make_shared<ASTExpressionList>(); auto list_of_selects = std::make_shared<ASTExpressionList>();
list_of_selects->children.push_back(std::move(select_query)); list_of_selects->children.push_back(std::move(select_query));

View File

@ -166,10 +166,14 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr
union_node->setIsSubquery(is_subquery); union_node->setIsSubquery(is_subquery);
union_node->setCTEName(cte_name); union_node->setCTEName(cte_name);
if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT) if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_ALL)
union_node->setUnionMode(SelectUnionMode::INTERSECT); union_node->setUnionMode(SelectUnionMode::INTERSECT_ALL);
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT) else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_DISTINCT)
union_node->setUnionMode(SelectUnionMode::EXCEPT); union_node->setUnionMode(SelectUnionMode::INTERSECT_DISTINCT);
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_ALL)
union_node->setUnionMode(SelectUnionMode::EXCEPT_ALL);
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT_DISTINCT)
union_node->setUnionMode(SelectUnionMode::EXCEPT_DISTINCT);
else else
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized");
@ -523,7 +527,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
{ {
if (!tables_in_select_query) if (!tables_in_select_query)
{ {
/** If no table is specified in SELECT query we substitude system.one table. /** If no table is specified in SELECT query we substitute system.one table.
* SELECT * FROM system.one; * SELECT * FROM system.one;
*/ */
Identifier storage_identifier("system.one"); Identifier storage_identifier("system.one");

View File

@ -99,10 +99,25 @@ String UnionNode::getName() const
continue; continue;
auto query_union_mode = union_modes.at(i - 1); auto query_union_mode = union_modes.at(i - 1);
if (query_union_mode == SelectUnionMode::ALL || query_union_mode == SelectUnionMode::DISTINCT)
buffer << " UNION " << toString(query_union_mode); if (query_union_mode == SelectUnionMode::UNION_DEFAULT)
else buffer << "UNION";
buffer << toString(query_union_mode); else if (query_union_mode == SelectUnionMode::UNION_ALL)
buffer << "UNION ALL";
else if (query_union_mode == SelectUnionMode::UNION_DISTINCT)
buffer << "UNION DISTINCT";
else if (query_union_mode == SelectUnionMode::EXCEPT_DEFAULT)
buffer << "EXCEPT";
else if (query_union_mode == SelectUnionMode::EXCEPT_ALL)
buffer << "EXCEPT ALL";
else if (query_union_mode == SelectUnionMode::EXCEPT_DISTINCT)
buffer << "EXCEPT DISTINCT";
else if (query_union_mode == SelectUnionMode::INTERSECT_DEFAULT)
buffer << "INTERSECT";
else if (query_union_mode == SelectUnionMode::INTERSECT_ALL)
buffer << "INTERSECT ALL";
else if (query_union_mode == SelectUnionMode::INTERSECT_DISTINCT)
buffer << "INTERSECT DISTINCT";
} }
return buffer.str(); return buffer.str();
@ -133,12 +148,7 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
table_expression_modifiers->dump(buffer); table_expression_modifiers->dump(buffer);
} }
buffer << ", union_mode: "; buffer << ", union_mode: " << toString(union_mode);
if (union_mode == SelectUnionMode::ALL || union_mode == SelectUnionMode::DISTINCT)
buffer << " UNION " << toString(union_mode);
else
buffer << toString(union_mode);
size_t union_modes_size = union_modes.size(); size_t union_modes_size = union_modes.size();
buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n'; buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n';
@ -148,10 +158,7 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
buffer << std::string(indent + 4, ' '); buffer << std::string(indent + 4, ' ');
auto query_union_mode = union_modes[i]; auto query_union_mode = union_modes[i];
if (query_union_mode == SelectUnionMode::ALL || query_union_mode == SelectUnionMode::DISTINCT) buffer << toString(query_union_mode);
buffer << " UNION " << toString(query_union_mode);
else
buffer << toString(query_union_mode);
if (i + 1 != union_modes_size) if (i + 1 != union_modes_size)
buffer << '\n'; buffer << '\n';
@ -208,6 +215,12 @@ ASTPtr UnionNode::toASTImpl() const
{ {
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>(); auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
select_with_union_query->union_mode = union_mode; select_with_union_query->union_mode = union_mode;
if (union_mode != SelectUnionMode::UNION_DEFAULT &&
union_mode != SelectUnionMode::EXCEPT_DEFAULT &&
union_mode != SelectUnionMode::EXCEPT_DEFAULT)
select_with_union_query->is_normalized = true;
select_with_union_query->list_of_modes = union_modes; select_with_union_query->list_of_modes = union_modes;
select_with_union_query->set_of_modes = union_modes_set; select_with_union_query->set_of_modes = union_modes_set;
select_with_union_query->children.push_back(getQueriesNode()->toAST()); select_with_union_query->children.push_back(getQueriesNode()->toAST());

View File

@ -39,7 +39,6 @@ public:
/// This is a temporary table for transferring to remote servers for distributed query processing. /// This is a temporary table for transferring to remote servers for distributed query processing.
StoragePtr table; StoragePtr table;
private:
/// The source is obtained using the InterpreterSelectQuery subquery. /// The source is obtained using the InterpreterSelectQuery subquery.
std::unique_ptr<QueryPlan> source; std::unique_ptr<QueryPlan> source;
}; };

View File

@ -416,75 +416,6 @@ bool TableJoin::needStreamWithNonJoinedRows() const
return isRightOrFull(kind()); return isRightOrFull(kind());
} }
static std::optional<String> getDictKeyName(const String & dict_name , ContextPtr context)
{
auto dictionary = context->getExternalDictionariesLoader().getDictionary(dict_name, context);
if (!dictionary)
return {};
if (const auto & structure = dictionary->getStructure(); structure.id)
return structure.id->name;
return {};
}
bool TableJoin::tryInitDictJoin(const Block & sample_block, ContextPtr context)
{
bool allowed_inner = isInner(kind()) && strictness() == JoinStrictness::All;
bool allowed_left = isLeft(kind()) && (strictness() == JoinStrictness::Any ||
strictness() == JoinStrictness::All ||
strictness() == JoinStrictness::Semi ||
strictness() == JoinStrictness::Anti);
/// Support ALL INNER, [ANY | ALL | SEMI | ANTI] LEFT
if (!allowed_inner && !allowed_left)
return false;
if (clauses.size() != 1 || clauses[0].key_names_right.size() != 1)
return false;
const auto & right_key = getOnlyClause().key_names_right[0];
/// TODO: support 'JOIN ... ON expr(dict_key) = table_key'
auto it_key = original_names.find(right_key);
if (it_key == original_names.end())
return false;
if (!right_storage_dictionary)
return false;
auto dict_name = right_storage_dictionary->getDictionaryName();
auto dict_key = getDictKeyName(dict_name, context);
if (!dict_key.has_value() || *dict_key != it_key->second)
return false; /// JOIN key != Dictionary key
Names src_names;
NamesAndTypesList dst_columns;
for (const auto & col : sample_block)
{
if (col.name == right_key)
continue; /// do not extract key column
auto it = original_names.find(col.name);
if (it != original_names.end())
{
String original = it->second;
src_names.push_back(original);
dst_columns.push_back({col.name, col.type});
}
else
{
/// Can't extract column from dictionary table
/// TODO: Sometimes it should be possible to recunstruct required column,
/// e.g. if it's an expression depending on dictionary attributes
return false;
}
}
dictionary_reader = std::make_shared<DictionaryReader>(dict_name, src_names, dst_columns, context);
return true;
}
static void renameIfNeeded(String & name, const NameToNameMap & renames) static void renameIfNeeded(String & name, const NameToNameMap & renames)
{ {
if (const auto it = renames.find(name); it != renames.end()) if (const auto it = renames.find(name); it != renames.end())

View File

@ -8,16 +8,24 @@ const char * toString(SelectUnionMode mode)
{ {
switch (mode) switch (mode)
{ {
case SelectUnionMode::ALL: case SelectUnionMode::UNION_DEFAULT:
return "ALL"; return "UNION_DEFAULT";
case SelectUnionMode::DISTINCT: case SelectUnionMode::UNION_ALL:
return "DISTINCT"; return "UNION_ALL";
case SelectUnionMode::EXCEPT: case SelectUnionMode::UNION_DISTINCT:
return "EXCEPT"; return "UNION_DISTINCT";
case SelectUnionMode::INTERSECT: case SelectUnionMode::EXCEPT_DEFAULT:
return "INTERSECT"; return "EXCEPT_DEFAULT";
case SelectUnionMode::Unspecified: case SelectUnionMode::EXCEPT_ALL:
return "Unspecified"; return "EXCEPT_ALL";
case SelectUnionMode::EXCEPT_DISTINCT:
return "EXCEPT_DISTINCT";
case SelectUnionMode::INTERSECT_DEFAULT:
return "INTERSECT_DEFAULT";
case SelectUnionMode::INTERSECT_ALL:
return "INTERSECT_ALL";
case SelectUnionMode::INTERSECT_DISTINCT:
return "INTERSECT_DEFAULT";
} }
} }

View File

@ -93,7 +93,6 @@ namespace ErrorCodes
* TODO: Support projections * TODO: Support projections
* TODO: Support read in order optimization * TODO: Support read in order optimization
* TODO: UNION storage limits * TODO: UNION storage limits
* TODO: Interpreter resources
* TODO: Support max streams * TODO: Support max streams
* TODO: Support ORDER BY read in order optimization * TODO: Support ORDER BY read in order optimization
* TODO: Support GROUP BY read in order optimization * TODO: Support GROUP BY read in order optimization
@ -581,7 +580,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, const SelectQ
if (select_query_options.is_subquery) if (select_query_options.is_subquery)
return; return;
SubqueriesForSets subqueries_for_sets; PreparedSets::SubqueriesForSets subqueries_for_sets;
const auto & subquery_node_to_sets = planner_context->getGlobalPlannerContext()->getSubqueryNodesForSets(); const auto & subquery_node_to_sets = planner_context->getGlobalPlannerContext()->getSubqueryNodesForSets();
for (auto [key, subquery_node_for_set] : subquery_node_to_sets) for (auto [key, subquery_node_for_set] : subquery_node_to_sets)
@ -602,9 +601,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, const SelectQ
subqueries_for_sets.emplace(key, std::move(subquery_for_set)); subqueries_for_sets.emplace(key, std::move(subquery_for_set));
} }
const Settings & settings = planner_context->getQueryContext()->getSettingsRef(); addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext());
SizeLimits limits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode);
addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), limits, planner_context->getQueryContext());
} }
} }
@ -649,7 +646,9 @@ void Planner::buildQueryPlanIfNeeded()
if (auto * union_query_tree = query_tree->as<UnionNode>()) if (auto * union_query_tree = query_tree->as<UnionNode>())
{ {
auto union_mode = union_query_tree->getUnionMode(); auto union_mode = union_query_tree->getUnionMode();
if (union_mode == SelectUnionMode::Unspecified) if (union_mode == SelectUnionMode::UNION_DEFAULT ||
union_mode == SelectUnionMode::EXCEPT_DEFAULT ||
union_mode == SelectUnionMode::INTERSECT_DEFAULT)
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION mode must be initialized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION mode must be initialized");
size_t queries_size = union_query_tree->getQueries().getNodes().size(); size_t queries_size = union_query_tree->getQueries().getNodes().size();
@ -695,37 +694,48 @@ void Planner::buildQueryPlanIfNeeded()
const auto & settings = query_context->getSettingsRef(); const auto & settings = query_context->getSettingsRef();
auto max_threads = settings.max_threads; auto max_threads = settings.max_threads;
if (union_mode == SelectUnionMode::ALL || union_mode == SelectUnionMode::DISTINCT) bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT ||
union_mode == SelectUnionMode::EXCEPT_DISTINCT;
if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT)
{ {
auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads); auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
query_plan.unitePlans(std::move(union_step), std::move(query_plans)); query_plan.unitePlans(std::move(union_step), std::move(query_plans));
if (union_query_tree->getUnionMode() == SelectUnionMode::DISTINCT)
{
/// Add distinct transform
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(
query_plan.getCurrentDataStream(),
limits,
0 /*limit hint*/,
query_plan.getCurrentDataStream().header.getNames(),
false /*pre distinct*/,
settings.optimize_distinct_in_order);
query_plan.addStep(std::move(distinct_step));
}
} }
else if (union_mode == SelectUnionMode::INTERSECT || union_mode == SelectUnionMode::EXCEPT) else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT ||
union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT)
{ {
IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT; IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN;
if (union_mode == SelectUnionMode::EXCEPT)
intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT; if (union_mode == SelectUnionMode::INTERSECT_ALL)
intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL;
else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT)
intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT;
else if (union_mode == SelectUnionMode::EXCEPT_ALL)
intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL;
else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT)
intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT;
auto union_step = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads); auto union_step = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(query_plans)); query_plan.unitePlans(std::move(union_step), std::move(query_plans));
} }
if (is_distinct)
{
/// Add distinct transform
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(
query_plan.getCurrentDataStream(),
limits,
0 /*limit hint*/,
query_plan.getCurrentDataStream().header.getNames(),
false /*pre distinct*/,
settings.optimize_distinct_in_order);
query_plan.addStep(std::move(distinct_step));
}
return; return;
} }
@ -1082,6 +1092,7 @@ void Planner::buildQueryPlanIfNeeded()
settings.min_free_disk_space_for_temporary_data, settings.min_free_disk_space_for_temporary_data,
settings.compile_aggregate_expressions, settings.compile_aggregate_expressions,
settings.min_count_to_compile_aggregate_expression, settings.min_count_to_compile_aggregate_expression,
settings.max_block_size,
/* only_merge */ false, /* only_merge */ false,
stats_collecting_params stats_collecting_params
); );
@ -1237,7 +1248,8 @@ void Planner::buildQueryPlanIfNeeded()
settings.remerge_sort_lowered_memory_bytes_ratio, settings.remerge_sort_lowered_memory_bytes_ratio,
settings.max_bytes_before_external_sort, settings.max_bytes_before_external_sort,
planner_context->getQueryContext()->getTemporaryVolume(), planner_context->getQueryContext()->getTemporaryVolume(),
settings.min_free_disk_space_for_temporary_data); settings.min_free_disk_space_for_temporary_data,
settings.optimize_sorting_by_input_stream_properties);
sorting_step->setStepDescription("Sorting for ORDER BY"); sorting_step->setStepDescription("Sorting for ORDER BY");
query_plan.addStep(std::move(sorting_step)); query_plan.addStep(std::move(sorting_step));
@ -1411,6 +1423,22 @@ void Planner::buildQueryPlanIfNeeded()
query_plan.addStep(std::move(projection_step)); query_plan.addStep(std::move(projection_step));
addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context); addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context);
/// Extend lifetime of context, table locks, storages
query_plan.addInterpreterContext(planner_context->getQueryContext());
for (auto & [table_expression, _] : planner_context->getTableExpressionNodeToData())
{
if (auto * table_node = table_expression->as<TableNode>())
{
query_plan.addStorageHolder(table_node->getStorage());
query_plan.addTableLock(table_node->getStorageLock());
}
else if (auto * table_function_node = table_expression->as<TableFunctionNode>())
{
query_plan.addStorageHolder(table_function_node->getStorage());
}
}
} }
} }

View File

@ -6,7 +6,6 @@
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Interpreters/SubqueryForSet.h>
#include <Analyzer/IQueryTreeNode.h> #include <Analyzer/IQueryTreeNode.h>

View File

@ -6,7 +6,6 @@
#include <Core/NamesAndTypes.h> #include <Core/NamesAndTypes.h>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Interpreters/SubqueryForSet.h>
#include <Interpreters/Set.h> #include <Interpreters/Set.h>
#include <Analyzer/IQueryTreeNode.h> #include <Analyzer/IQueryTreeNode.h>
@ -46,7 +45,7 @@ public:
/// Get set for key, if no set is registered null is returned /// Get set for key, if no set is registered null is returned
SetPtr getSetOrNull(const SetKey & key) const; SetPtr getSetOrNull(const SetKey & key) const;
/// Get set for key, if no set is registered logical exception is throwed /// Get set for key, if no set is registered logical exception is thrown
SetPtr getSetOrThrow(const SetKey & key) const; SetPtr getSetOrThrow(const SetKey & key) const;
/** Register subquery node for set /** Register subquery node for set

View File

@ -11,7 +11,7 @@ namespace DB
{ {
/** Join clause represent single JOIN ON section clause. /** Join clause represent single JOIN ON section clause.
* Join clause consits of JOIN keys and conditions. * Join clause consists of JOIN keys and conditions.
* *
* JOIN can contain multiple clauses in JOIN ON section. * JOIN can contain multiple clauses in JOIN ON section.
* Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id OR t1.value = t2.value; * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id OR t1.value = t2.value;
@ -34,7 +34,7 @@ namespace DB
* *
* We have 2 streams, left stream and right stream. * We have 2 streams, left stream and right stream.
* We split JOIN ON section expressions actions in two parts left join expression actions and right join expression actions. * We split JOIN ON section expressions actions in two parts left join expression actions and right join expression actions.
* Left join expresion actions must be used to calculate necessary actions for left stream. * Left join expression actions must be used to calculate necessary actions for left stream.
* Right join expression actions must be used to calculate necessary actions for right stream. * Right join expression actions must be used to calculate necessary actions for right stream.
*/ */
class PlannerContext; class PlannerContext;

View File

@ -122,11 +122,8 @@ void CreatingSetsStep::describePipeline(FormatSettings & settings) const
IQueryPlanStep::describePipeline(processors, settings); IQueryPlanStep::describePipeline(processors, settings);
} }
void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context) void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context)
{ {
if (!prepared_sets || prepared_sets->empty())
return;
DataStreams input_streams; DataStreams input_streams;
input_streams.emplace_back(query_plan.getCurrentDataStream()); input_streams.emplace_back(query_plan.getCurrentDataStream());
@ -134,7 +131,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets,
plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan))); plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan)));
query_plan = QueryPlan(); query_plan = QueryPlan();
for (auto & [description, subquery_for_set] : prepared_sets->detachSubqueries()) for (auto & [description, subquery_for_set] : subqueries_for_sets)
{ {
if (!subquery_for_set.hasSource()) if (!subquery_for_set.hasSource())
continue; continue;
@ -166,4 +163,12 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets,
query_plan.unitePlans(std::move(creating_sets), std::move(plans)); query_plan.unitePlans(std::move(creating_sets), std::move(plans));
} }
void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context)
{
if (!prepared_sets || prepared_sets->empty())
return;
addCreatingSetsStep(query_plan, prepared_sets->detachSubqueries(), context);
}
} }

View File

@ -49,6 +49,8 @@ private:
Processors processors; Processors processors;
}; };
void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets subqueries_for_sets, ContextPtr context);
void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context); void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets, ContextPtr context);
} }