diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 9b8d7cb30ce..8cc49ce0c17 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -51,10 +51,6 @@ set(SRCS ${RDKAFKA_SOURCE_DIR}/snappy.c ${RDKAFKA_SOURCE_DIR}/tinycthread.c ${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c - #${RDKAFKA_SOURCE_DIR}/xxhash.c - #${RDKAFKA_SOURCE_DIR}/lz4.c - #${RDKAFKA_SOURCE_DIR}/lz4frame.c - #${RDKAFKA_SOURCE_DIR}/lz4hc.c ${RDKAFKA_SOURCE_DIR}/rdgz.c ) diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp index ac760269162..8c9f7403898 100644 --- a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.cpp @@ -5,7 +5,7 @@ namespace DB { -FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector & tables_) +FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector & tables_) : tables(tables_) { } @@ -16,13 +16,21 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &) if (!identifier.compound()) { - if (!tables.empty()) - best_table = &tables[0]; + for (const auto & [table, names] : tables) + { + if (std::find(names.begin(), names.end(), identifier.name) != names.end()) + { + // TODO: make sure no collision ever happens + if (!best_table) + best_table = &table; + } + } } else { + // FIXME: make a better matcher using `names`? size_t best_match = 0; - for (const DatabaseAndTableWithAlias & table : tables) + for (const auto & [table, names] : tables) { if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table)) if (match > best_match) diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h index 96e801f7ed2..498ee60ab0b 100644 --- a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -12,10 +13,10 @@ struct FindIdentifierBestTableData using TypeToVisit = ASTIdentifier; using IdentifierWithTable = std::pair; - const std::vector & tables; + const std::vector & tables; std::vector identifier_table; - FindIdentifierBestTableData(const std::vector & tables_); + FindIdentifierBestTableData(const std::vector & tables_); void visit(ASTIdentifier & identifier, ASTPtr &); }; diff --git a/dbms/src/Interpreters/InterpreterExplainQuery.cpp b/dbms/src/Interpreters/InterpreterExplainQuery.cpp index 4bacefe1e0f..be7a592ecb9 100644 --- a/dbms/src/Interpreters/InterpreterExplainQuery.cpp +++ b/dbms/src/Interpreters/InterpreterExplainQuery.cpp @@ -1,13 +1,15 @@ -#include - -#include -#include -#include -#include -#include #include + +#include +#include +#include +#include #include #include +#include +#include + +#include namespace DB @@ -26,7 +28,7 @@ Block InterpreterExplainQuery::getSampleBlock() Block block; ColumnWithTypeAndName col; - col.name = "ast"; + col.name = "explain"; col.type = std::make_shared(); col.column = col.type->createColumn(); block.insert(col); @@ -38,12 +40,21 @@ Block InterpreterExplainQuery::getSampleBlock() BlockInputStreamPtr InterpreterExplainQuery::executeImpl() { const ASTExplainQuery & ast = typeid_cast(*query); - - std::stringstream ss; - dumpAST(ast, ss); - Block sample_block = getSampleBlock(); MutableColumns res_columns = sample_block.cloneEmptyColumns(); + + std::stringstream ss; + + if (ast.getKind() == ASTExplainQuery::ParsedAST) + { + dumpAST(ast, ss); + } + else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax) + { + InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true); + interpreter.getQuery()->format(IAST::FormatSettings(ss, false)); + } + res_columns[0]->insert(ss.str()); return std::make_shared(sample_block.cloneWithColumns(std::move(res_columns))); diff --git a/dbms/src/Interpreters/InterpreterExplainQuery.h b/dbms/src/Interpreters/InterpreterExplainQuery.h index 4d366a9d56e..4db796ad014 100644 --- a/dbms/src/Interpreters/InterpreterExplainQuery.h +++ b/dbms/src/Interpreters/InterpreterExplainQuery.h @@ -1,5 +1,6 @@ #pragma once +#include #include @@ -14,8 +15,8 @@ using ASTPtr = std::shared_ptr; class InterpreterExplainQuery : public IInterpreter { public: - InterpreterExplainQuery(const ASTPtr & query_, const Context &) - : query(query_) + InterpreterExplainQuery(const ASTPtr & query_, const Context & context_) + : query(query_), context(context_) {} BlockIO execute() override; @@ -24,6 +25,7 @@ public: private: ASTPtr query; + Context context; BlockInputStreamPtr executeImpl(); }; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 87ab02af45c..f2a76a20d0a 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -80,8 +80,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, - bool only_analyze_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_) + bool only_analyze_, + bool modify_inplace) + : InterpreterSelectQuery( + query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace) { } @@ -90,8 +92,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const BlockInputStreamPtr & input_, QueryProcessingStage::Enum to_stage_, - bool only_analyze_) - : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_) + bool only_analyze_, + bool modify_inplace) + : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace) { } @@ -100,8 +103,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const StoragePtr & storage_, QueryProcessingStage::Enum to_stage_, - bool only_analyze_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_) + bool only_analyze_, + bool modify_inplace) + : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace) { } @@ -131,8 +135,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, - bool only_analyze_) - : query_ptr(query_ptr_->clone()) /// Note: the query is cloned because it will be modified during analysis. + bool only_analyze_, + bool modify_inplace) + /// NOTE: the query almost always should be cloned because it will be modified during analysis. + : query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone()) , query(typeid_cast(*query_ptr)) , context(context_) , to_stage(to_stage_) @@ -170,7 +176,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Read from subquery. interpreter_subquery = std::make_unique( - table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze); + table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace); source_header = interpreter_subquery->getSampleBlock(); } @@ -217,16 +223,23 @@ InterpreterSelectQuery::InterpreterSelectQuery( for (const auto & it : query_analyzer->getExternalTables()) if (!context.tryGetExternalTable(it.first)) context.addExternalTable(it.first, it.second); + } + if (!only_analyze || modify_inplace) + { if (query_analyzer->isRewriteSubqueriesPredicate()) { /// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery if (is_subquery) interpreter_subquery = std::make_unique( - table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, - only_analyze); + table_expression, + getSubqueryContext(context), + required_columns, + QueryProcessingStage::Complete, + subquery_depth + 1, + only_analyze, + modify_inplace); } - } if (interpreter_subquery) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index df1999f6a82..de5a11e727b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -52,7 +52,8 @@ public: const Names & required_result_column_names = Names{}, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, - bool only_analyze_ = false); + bool only_analyze_ = false, + bool modify_inplace = false); /// Read data not from the table specified in the query, but from the prepared source `input`. InterpreterSelectQuery( @@ -60,7 +61,8 @@ public: const Context & context_, const BlockInputStreamPtr & input_, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - bool only_analyze_ = false); + bool only_analyze_ = false, + bool modify_inplace = false); /// Read data not from the table specified in the query, but from the specified `storage_`. InterpreterSelectQuery( @@ -68,7 +70,8 @@ public: const Context & context_, const StoragePtr & storage_, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - bool only_analyze_ = false); + bool only_analyze_ = false, + bool modify_inplace = false); ~InterpreterSelectQuery() override; @@ -82,6 +85,8 @@ public: void ignoreWithTotals(); + ASTPtr getQuery() const { return query_ptr; } + private: InterpreterSelectQuery( const ASTPtr & query_ptr_, @@ -91,7 +96,8 @@ private: const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, - bool only_analyze_); + bool only_analyze_, + bool modify_inplace); struct Pipeline diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 7bc99ae8b9a..1dc5419223e 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -28,7 +29,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, - bool only_analyze) + bool only_analyze, + bool modify_inplace) : query_ptr(query_ptr_), context(context_), to_stage(to_stage_), @@ -81,12 +83,17 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (size_t query_num = 0; query_num < num_selects; ++query_num) { - const Names & current_required_result_column_names = query_num == 0 - ? required_result_column_names - : required_result_column_names_for_other_selects[query_num]; + const Names & current_required_result_column_names + = query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num]; nested_interpreters.emplace_back(std::make_unique( - ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze)); + ast.list_of_selects->children.at(query_num), + context, + current_required_result_column_names, + to_stage, + subquery_depth, + only_analyze, + modify_inplace)); } /// Determine structure of the result. diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index cde35e905f1..44131a9d100 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -22,7 +22,8 @@ public: const Names & required_result_column_names = Names{}, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, - bool only_analyze = false); + bool only_analyze = false, + bool modify_inplace = false); ~InterpreterSelectWithUnionQuery() override; @@ -39,6 +40,8 @@ public: void ignoreWithTotals(); + ASTPtr getQuery() const { return query_ptr; } + private: ASTPtr query_ptr; Context context; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 11bd6f564b7..612ea231bdd 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -59,17 +59,17 @@ bool PredicateExpressionsOptimizer::optimize() is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE); is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE); } + return is_rewrite_subqueries; } bool PredicateExpressionsOptimizer::optimizeImpl( - ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind) + ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind) { /// split predicate with `and` std::vector outer_predicate_expressions = splitConjunctionPredicate(outer_expression); - std::vector database_and_table_with_aliases = - getDatabaseAndTables(*ast_select, context.getCurrentDatabase()); + std::vector tables_with_columns = getDatabaseAndTablesWithColumnNames(*ast_select, context); bool is_rewrite_subquery = false; for (auto & outer_predicate : outer_predicate_expressions) @@ -77,7 +77,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl( if (isArrayJoinFunction(outer_predicate)) continue; - auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, database_and_table_with_aliases); + auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, tables_with_columns); /// TODO: remove origin expression for (const auto & [subquery, projection_columns] : subqueries_projection_columns) @@ -92,7 +92,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl( cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate std::vector inner_predicate_dependencies = - getDependenciesAndQualifiers(inner_predicate, database_and_table_with_aliases); + getDependenciesAndQualifiers(inner_predicate, tables_with_columns); setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies); @@ -169,7 +169,7 @@ std::vector PredicateExpressionsOptimizer::splitConjunctionPredicate(AST } std::vector -PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector & tables) +PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector & tables) { FindIdentifierBestTableVisitor::Data find_data(tables); FindIdentifierBestTableVisitor(find_data).visit(expression); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index fa9913170bf..9281247dd4e 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -2,6 +2,8 @@ #include +#include + namespace DB { @@ -70,11 +72,11 @@ private: std::vector splitConjunctionPredicate(ASTPtr & predicate_expression); std::vector getDependenciesAndQualifiers(ASTPtr & expression, - std::vector & tables_with_aliases); + std::vector & tables_with_aliases); bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery); - bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind); + bool optimizeImpl(ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind); bool allowPushDown(const ASTSelectQuery * subquery); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 58a29097072..d3b42c57926 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -13,23 +13,26 @@ #include #include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include #include #include -#include #include -#include #include +#include +#include #include + namespace DB { @@ -107,7 +110,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query visitor.visit(query); } - bool hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) @@ -591,8 +593,30 @@ Names qualifyOccupiedNames(NamesAndTypesList & columns, const NameSet & source_c return originals; } +void replaceJoinedTable(const ASTTablesInSelectQueryElement* join) +{ + if (!join || !join->table_expression) + return; + + auto & table_expr = static_cast(*join->table_expression.get()); + if (table_expr.database_and_table_name) + { + auto & table_id = typeid_cast(*table_expr.database_and_table_name.get()); + String expr = "(select * from " + table_id.name + ") as " + table_id.shortName(); + + // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b", + // we can't replace aliased tables. + // FIXME: long table names include database name, which we can't save within alias. + if (table_id.alias.empty() && table_id.isShort()) + { + ParserTableExpression parser; + table_expr = static_cast(*parseQuery(parser, expr, 0)); + } + } } +} // namespace + SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( ASTPtr & query, @@ -628,6 +652,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( { if (const ASTTablesInSelectQueryElement * node = select_query->join()) { + replaceJoinedTable(node); + const auto & joined_expression = static_cast(*node->table_expression); DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase()); diff --git a/dbms/src/Parsers/ASTExplainQuery.h b/dbms/src/Parsers/ASTExplainQuery.h index a1eb9feecd4..5ebd02b85f8 100644 --- a/dbms/src/Parsers/ASTExplainQuery.h +++ b/dbms/src/Parsers/ASTExplainQuery.h @@ -14,13 +14,15 @@ public: enum ExplainKind { ParsedAST, + AnalyzedSyntax, }; - ASTExplainQuery(ExplainKind kind_ = ParsedAST) + ASTExplainQuery(ExplainKind kind_) : kind(kind_) {} String getID(char delim) const override { return "Explain" + (delim + toString(kind)); } + ExplainKind getKind() const { return kind; } ASTPtr clone() const override { return std::make_shared(*this); } protected: @@ -37,7 +39,9 @@ private: switch (kind) { case ParsedAST: return "ParsedAST"; + case AnalyzedSyntax: return "AnalyzedSyntax"; } + __builtin_unreachable(); } }; diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index d5266ae4b4a..a63324f8b10 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -23,7 +23,7 @@ public: bool distinct = false; ASTPtr with_expression_list; ASTPtr select_expression_list; - ASTPtr tables; + ASTPtr tables; // pointer to TablesInSelectQuery ASTPtr prewhere_expression; ASTPtr where_expression; ASTPtr group_expression_list; diff --git a/dbms/src/Parsers/DumpASTNode.h b/dbms/src/Parsers/DumpASTNode.h index 8eb1342ffe1..6b945cd5c8d 100644 --- a/dbms/src/Parsers/DumpASTNode.h +++ b/dbms/src/Parsers/DumpASTNode.h @@ -68,9 +68,9 @@ private: { (*ostr) << nodeId(); - String aslias = ast.tryGetAlias(); - if (!aslias.empty()) - print("alias", aslias, " "); + String alias = ast.tryGetAlias(); + if (!alias.empty()) + print("alias", alias, " "); if (!ast.children.empty()) print("children", ast.children.size(), " "); diff --git a/dbms/src/Parsers/ParserQueryWithOutput.cpp b/dbms/src/Parsers/ParserQueryWithOutput.cpp index 91e320deeeb..d1679067854 100644 --- a/dbms/src/Parsers/ParserQueryWithOutput.cpp +++ b/dbms/src/Parsers/ParserQueryWithOutput.cpp @@ -36,11 +36,16 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ASTPtr query; ParserKeyword s_ast("AST"); + ParserKeyword s_analyze("ANALYZE"); bool explain_ast = false; + bool analyze_syntax = false; if (enable_explain && s_ast.ignore(pos, expected)) explain_ast = true; + if (enable_explain && s_analyze.ignore(pos, expected)) + analyze_syntax = true; + bool parsed = select_p.parse(pos, query, expected) || show_tables_p.parse(pos, query, expected) || table_p.parse(pos, query, expected) @@ -94,7 +99,12 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (explain_ast) { - node = std::make_shared(); + node = std::make_shared(ASTExplainQuery::ParsedAST); + node->children.push_back(query); + } + else if (analyze_syntax) + { + node = std::make_shared(ASTExplainQuery::AnalyzedSyntax); node->children.push_back(query); } else diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index c66eb51317c..1cf4882ddc8 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -277,11 +277,12 @@ def main(args): report_testcase.append(skipped) print("{0} - no reference file".format(MSG_UNKNOWN)) else: - result_is_different = subprocess.call(['cmp', '-s', reference_file, stdout_file], stdout = PIPE) + result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE) if result_is_different: - (diff, _) = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate() + diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0] diff = unicode(diff, errors='replace', encoding='utf-8') + cat = Popen(['cat', '-A'], stdin=PIPE, stdout=PIPE).communicate(input=diff)[0] failure = et.Element("failure", attrib = {"message": "result differs with reference"}) report_testcase.append(failure) @@ -294,7 +295,7 @@ def main(args): report_testcase.append(stdout_element) failures += 1 - print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff.encode('utf-8'))) + print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, cat.encode('utf-8'))) else: passed_total += 1 failures_chain = 0 diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index bc28d4efb7e..ff97111f6b7 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -1,36 +1,55 @@ --------Not need optimize predicate, but it works.------- +-------No need for predicate optimization, but still works------- 1 1 1 +2000-01-01 1 test string 1 1 -------Need push down------- +SELECT dummy\nFROM system.one \nANY LEFT JOIN \n(\n SELECT 0 AS dummy\n WHERE 1\n) USING (dummy)\nWHERE 1 0 +SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n) \nWHERE value = \'1\' 1 +SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE id = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE `2` = 1\n) \nWHERE id = 1 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1 1 +SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1 1 +SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n) \nWHERE subquery = 1 1 1 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test.test \n HAVING a = 3\n) \nWHERE a = 3 3 3 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test.test \n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n) \nWHERE id = 1 +2000-01-01 1 test string 1 1 +SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test.test AS table_alias \n HAVING b = 3\n) AS outer_table_alias \nWHERE outer_table_alias.b = 3 3 3 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nWHERE id = 1 2000-01-01 1 test string 1 1 -3 3 --------Force push down------- -2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1 2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) AS b \n WHERE id = 1\n) \nWHERE id = 1 2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nWHERE id = 1 2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1 2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) AS b \nWHERE b.id = 1 2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) AS a \n WHERE id = 1\n) AS b \nWHERE b.id = 1 2000-01-01 1 test string 1 1 +SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test.test \n WHERE id = 1\n GROUP BY \n id, \n date\n) \nWHERE id = 1 1 2000-01-01 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE id = 1\n) USING (id)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -2000-01-01 1 test string 1 1 -1 2000-01-01 1 test string 1 1 -2000-01-01 1 test string 1 1 -2000-01-01 2 test string 2 2 -1 -1 --------Push to having expression, need check.------- --------Compatibility test------- +SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test.test USING (id)\nWHERE value = 1 +1 2000-01-01 test string 1 1 +SELECT b.value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test.test AS b USING (id)\nWHERE value = 1 +1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) \n ANY LEFT JOIN \n (\n SELECT *\n FROM test.test \n WHERE id = 1\n ) USING (id)\n WHERE id = 1\n) \nWHERE id = 1 +2000-01-01 1 test string 1 1 +SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE id = 1\n) AS b USING (id)\nWHERE b.id = 1 +2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 +SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers \n LIMIT 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE date = toDate(\'2000-01-01\')\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') 1 2000-01-01 test string 1 1 diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index 495c367e3bd..4d3187c6338 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -1,10 +1,8 @@ SET send_logs_level = 'none'; -DROP TABLE IF EXISTS test.perf; DROP TABLE IF EXISTS test.test; DROP TABLE IF EXISTS test.test_view; -CREATE TABLE test.perf(site String, user_id UInt64, z Float64)ENGINE = Log; CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192); CREATE VIEW test.test_view AS SELECT * FROM test.test; @@ -12,71 +10,103 @@ INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1); INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2); SET enable_optimize_predicate_expression = 1; +SET enable_debug_queries = 1; -SELECT '-------Not need optimize predicate, but it works.-------'; +SELECT '-------No need for predicate optimization, but still works-------'; SELECT 1; SELECT 1 AS id WHERE id = 1; SELECT arrayJoin([1,2,3]) AS id WHERE id = 1; -SELECT * FROM (SELECT perf_1.z AS z_1 FROM test.perf AS perf_1); +SELECT * FROM test.test WHERE id = 1; SELECT '-------Need push down-------'; -SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1; -SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1'; -SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1; -SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; -SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; -SELECT * FROM (SELECT perf_1.z AS z_1 FROM test.perf AS perf_1) WHERE z_1 = 1; +-- Optimize predicate expressions without tables +ANALYZE SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1; +SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1; + +ANALYZE SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1'; +SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1'; + +ANALYZE SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1; +SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1; + +ANALYZE SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; +SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; + +ANALYZE SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; +SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; + +ANALYZE SELECT * FROM (SELECT 1 AS id, (SELECT 1) as subquery) WHERE subquery = 1; SELECT * FROM (SELECT 1 AS id, (SELECT 1) as subquery) WHERE subquery = 1; + +-- Optimize predicate expressions using tables +ANALYZE SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; -SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; + +ANALYZE SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1; SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1; + +ANALYZE SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -SELECT '-------Force push down-------'; -SET force_primary_key = 1; - -- Optimize predicate expression with asterisk +ANALYZE SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; + -- Optimize predicate expression with asterisk and nested subquery +ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1; SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1; + -- Optimize predicate expression with qualified asterisk +ANALYZE SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1; SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1; + -- Optimize predicate expression without asterisk +ANALYZE SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1; SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1; + -- Optimize predicate expression without asterisk and contains nested subquery +ANALYZE SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1; SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1; + -- Optimize predicate expression with qualified +ANALYZE SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1; SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1; + -- Optimize predicate expression with qualified and nested subquery +ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1; SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1; + -- Optimize predicate expression with aggregate function +ANALYZE SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1; SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1; -- Optimize predicate expression with union all query +ANALYZE SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1; SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1; + -- Optimize predicate expression with join query +ANALYZE SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1; SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1; + +ANALYZE SELECT * FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test USING id WHERE value = 1; +SELECT * FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test USING id WHERE value = 1; + +-- FIXME: no support for aliased tables for now. +ANALYZE SELECT b.value FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test AS b USING id WHERE value = 1; +SELECT b.value FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test AS b USING id WHERE value = 1; + -- Optimize predicate expression with join and nested subquery +ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1; SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1; + -- Optimize predicate expression with join query and qualified -SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.id = 1; +ANALYZE SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING id WHERE b.id = 1; +SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING id WHERE b.id = 1; --- Optimize predicate expression with view -SELECT * FROM test.test_view WHERE id = 1; -SELECT * FROM test.test_view WHERE id = 2; -SELECT id FROM test.test_view WHERE id = 1; -SELECT s.id FROM test.test_view AS s WHERE s.id = 1; - -SELECT '-------Push to having expression, need check.-------'; -SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 } -SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; -- { serverError 277 } -SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; -- { serverError 277 } -SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -- { serverError 277 } - -SELECT '-------Compatibility test-------'; +-- Compatibility test +ANALYZE SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01'); SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01'); -DROP TABLE IF EXISTS test.perf; DROP TABLE IF EXISTS test.test; DROP TABLE IF EXISTS test.test_view; diff --git a/dbms/tests/queries/0_stateless/00908_analyze_query.reference b/dbms/tests/queries/0_stateless/00908_analyze_query.reference new file mode 100644 index 00000000000..b784efbad42 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00908_analyze_query.reference @@ -0,0 +1 @@ +SELECT \n a, \n b\nFROM test.a diff --git a/dbms/tests/queries/0_stateless/00908_analyze_query.sql b/dbms/tests/queries/0_stateless/00908_analyze_query.sql new file mode 100644 index 00000000000..116b4bdc72d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00908_analyze_query.sql @@ -0,0 +1,8 @@ +set enable_debug_queries = 1; + +DROP TABLE IF EXISTS test.a; +CREATE TABLE test.a (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a; + +ANALYZE SELECT * FROM test.a; + +DROP TABLE test.a; diff --git a/dbms/tests/queries/bugs/00597_push_down_predicate.sql b/dbms/tests/queries/bugs/00597_push_down_predicate.sql new file mode 100644 index 00000000000..0de9522ab50 --- /dev/null +++ b/dbms/tests/queries/bugs/00597_push_down_predicate.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test.test; +DROP TABLE IF EXISTS test.test_view; + +CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192); +CREATE VIEW test.test_view AS SELECT * FROM test.test; + +SET enable_optimize_predicate_expression = 1; +SET enable_debug_queries = 1; + +-- Optimize predicate expression with view +-- TODO: simple view is not replaced with subquery inside syntax analyzer +ANALYZE SELECT * FROM test.test_view WHERE id = 1; +ANALYZE SELECT * FROM test.test_view WHERE id = 2; +ANALYZE SELECT id FROM test.test_view WHERE id = 1; +ANALYZE SELECT s.id FROM test.test_view AS s WHERE s.id = 1; + +-- TODO: this query shouldn't work, because the name `toUInt64(sum(id))` is undefined for user +SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3;