Merge pull request #4387 from abyss7/CLICKHOUSE-4268

Improve push-down logic for joined statements
This commit is contained in:
alexey-milovidov 2019-03-02 04:19:50 +03:00 committed by GitHub
commit ee903886e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 277 additions and 111 deletions

View File

@ -51,10 +51,6 @@ set(SRCS
${RDKAFKA_SOURCE_DIR}/snappy.c
${RDKAFKA_SOURCE_DIR}/tinycthread.c
${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
#${RDKAFKA_SOURCE_DIR}/xxhash.c
#${RDKAFKA_SOURCE_DIR}/lz4.c
#${RDKAFKA_SOURCE_DIR}/lz4frame.c
#${RDKAFKA_SOURCE_DIR}/lz4hc.c
${RDKAFKA_SOURCE_DIR}/rdgz.c
)

View File

@ -5,7 +5,7 @@
namespace DB
{
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_)
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_)
: tables(tables_)
{
}
@ -16,13 +16,21 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)
if (!identifier.compound())
{
if (!tables.empty())
best_table = &tables[0];
for (const auto & [table, names] : tables)
{
if (std::find(names.begin(), names.end(), identifier.name) != names.end())
{
// TODO: make sure no collision ever happens
if (!best_table)
best_table = &table;
}
}
}
else
{
// FIXME: make a better matcher using `names`?
size_t best_match = 0;
for (const DatabaseAndTableWithAlias & table : tables)
for (const auto & [table, names] : tables)
{
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table))
if (match > best_match)

View File

@ -3,6 +3,7 @@
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
@ -12,10 +13,10 @@ struct FindIdentifierBestTableData
using TypeToVisit = ASTIdentifier;
using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;
const std::vector<DatabaseAndTableWithAlias> & tables;
const std::vector<TableWithColumnNames> & tables;
std::vector<IdentifierWithTable> identifier_table;
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);
FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_);
void visit(ASTIdentifier & identifier, ASTPtr &);
};

View File

@ -1,13 +1,15 @@
#include <sstream>
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/BlockIO.h>
#include <DataTypes/DataTypeString.h>
#include <Parsers/queryToString.h>
#include <Common/typeid_cast.h>
#include <Interpreters/InterpreterExplainQuery.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/OneBlockInputStream.h>
#include <DataTypes/DataTypeString.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/DumpASTNode.h>
#include <Parsers/queryToString.h>
#include <Common/typeid_cast.h>
#include <sstream>
namespace DB
@ -26,7 +28,7 @@ Block InterpreterExplainQuery::getSampleBlock()
Block block;
ColumnWithTypeAndName col;
col.name = "ast";
col.name = "explain";
col.type = std::make_shared<DataTypeString>();
col.column = col.type->createColumn();
block.insert(col);
@ -38,12 +40,21 @@ Block InterpreterExplainQuery::getSampleBlock()
BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
{
const ASTExplainQuery & ast = typeid_cast<const ASTExplainQuery &>(*query);
std::stringstream ss;
dumpAST(ast, ss);
Block sample_block = getSampleBlock();
MutableColumns res_columns = sample_block.cloneEmptyColumns();
std::stringstream ss;
if (ast.getKind() == ASTExplainQuery::ParsedAST)
{
dumpAST(ast, ss);
}
else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
{
InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true);
interpreter.getQuery()->format(IAST::FormatSettings(ss, false));
}
res_columns[0]->insert(ss.str());
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));

View File

@ -1,5 +1,6 @@
#pragma once
#include <Interpreters/Context.h>
#include <Interpreters/IInterpreter.h>
@ -14,8 +15,8 @@ using ASTPtr = std::shared_ptr<IAST>;
class InterpreterExplainQuery : public IInterpreter
{
public:
InterpreterExplainQuery(const ASTPtr & query_, const Context &)
: query(query_)
InterpreterExplainQuery(const ASTPtr & query_, const Context & context_)
: query(query_), context(context_)
{}
BlockIO execute() override;
@ -24,6 +25,7 @@ public:
private:
ASTPtr query;
Context context;
BlockInputStreamPtr executeImpl();
};

View File

@ -80,8 +80,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_)
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(
query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace)
{
}
@ -90,8 +92,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Context & context_,
const BlockInputStreamPtr & input_,
QueryProcessingStage::Enum to_stage_,
bool only_analyze_)
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_)
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
{
}
@ -100,8 +103,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Context & context_,
const StoragePtr & storage_,
QueryProcessingStage::Enum to_stage_,
bool only_analyze_)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_)
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
{
}
@ -131,8 +135,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_)
: query_ptr(query_ptr_->clone()) /// Note: the query is cloned because it will be modified during analysis.
bool only_analyze_,
bool modify_inplace)
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
: query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone())
, query(typeid_cast<ASTSelectQuery &>(*query_ptr))
, context(context_)
, to_stage(to_stage_)
@ -170,7 +176,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze);
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace);
source_header = interpreter_subquery->getSampleBlock();
}
@ -217,16 +223,23 @@ InterpreterSelectQuery::InterpreterSelectQuery(
for (const auto & it : query_analyzer->getExternalTables())
if (!context.tryGetExternalTable(it.first))
context.addExternalTable(it.first, it.second);
}
if (!only_analyze || modify_inplace)
{
if (query_analyzer->isRewriteSubqueriesPredicate())
{
/// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery
if (is_subquery)
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1,
only_analyze);
table_expression,
getSubqueryContext(context),
required_columns,
QueryProcessingStage::Complete,
subquery_depth + 1,
only_analyze,
modify_inplace);
}
}
if (interpreter_subquery)

View File

@ -52,7 +52,8 @@ public:
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
bool only_analyze_ = false);
bool only_analyze_ = false,
bool modify_inplace = false);
/// Read data not from the table specified in the query, but from the prepared source `input`.
InterpreterSelectQuery(
@ -60,7 +61,8 @@ public:
const Context & context_,
const BlockInputStreamPtr & input_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
bool only_analyze_ = false);
bool only_analyze_ = false,
bool modify_inplace = false);
/// Read data not from the table specified in the query, but from the specified `storage_`.
InterpreterSelectQuery(
@ -68,7 +70,8 @@ public:
const Context & context_,
const StoragePtr & storage_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
bool only_analyze_ = false);
bool only_analyze_ = false,
bool modify_inplace = false);
~InterpreterSelectQuery() override;
@ -82,6 +85,8 @@ public:
void ignoreWithTotals();
ASTPtr getQuery() const { return query_ptr; }
private:
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
@ -91,7 +96,8 @@ private:
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_);
bool only_analyze_,
bool modify_inplace);
struct Pipeline

View File

@ -10,6 +10,7 @@
#include <Columns/ColumnConst.h>
#include <Common/typeid_cast.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTExpressionList.h>
namespace DB
@ -28,7 +29,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze)
bool only_analyze,
bool modify_inplace)
: query_ptr(query_ptr_),
context(context_),
to_stage(to_stage_),
@ -81,12 +83,17 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
for (size_t query_num = 0; query_num < num_selects; ++query_num)
{
const Names & current_required_result_column_names = query_num == 0
? required_result_column_names
: required_result_column_names_for_other_selects[query_num];
const Names & current_required_result_column_names
= query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num];
nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze));
ast.list_of_selects->children.at(query_num),
context,
current_required_result_column_names,
to_stage,
subquery_depth,
only_analyze,
modify_inplace));
}
/// Determine structure of the result.

View File

@ -22,7 +22,8 @@ public:
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
bool only_analyze = false);
bool only_analyze = false,
bool modify_inplace = false);
~InterpreterSelectWithUnionQuery() override;
@ -39,6 +40,8 @@ public:
void ignoreWithTotals();
ASTPtr getQuery() const { return query_ptr; }
private:
ASTPtr query_ptr;
Context context;

View File

@ -59,17 +59,17 @@ bool PredicateExpressionsOptimizer::optimize()
is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE);
is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE);
}
return is_rewrite_subqueries;
}
bool PredicateExpressionsOptimizer::optimizeImpl(
ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
{
/// split predicate with `and`
std::vector<ASTPtr> outer_predicate_expressions = splitConjunctionPredicate(outer_expression);
std::vector<DatabaseAndTableWithAlias> database_and_table_with_aliases =
getDatabaseAndTables(*ast_select, context.getCurrentDatabase());
std::vector<TableWithColumnNames> tables_with_columns = getDatabaseAndTablesWithColumnNames(*ast_select, context);
bool is_rewrite_subquery = false;
for (auto & outer_predicate : outer_predicate_expressions)
@ -77,7 +77,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
if (isArrayJoinFunction(outer_predicate))
continue;
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, database_and_table_with_aliases);
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, tables_with_columns);
/// TODO: remove origin expression
for (const auto & [subquery, projection_columns] : subqueries_projection_columns)
@ -92,7 +92,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate
std::vector<IdentifierWithQualifier> inner_predicate_dependencies =
getDependenciesAndQualifiers(inner_predicate, database_and_table_with_aliases);
getDependenciesAndQualifiers(inner_predicate, tables_with_columns);
setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies);
@ -169,7 +169,7 @@ std::vector<ASTPtr> PredicateExpressionsOptimizer::splitConjunctionPredicate(AST
}
std::vector<PredicateExpressionsOptimizer::IdentifierWithQualifier>
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<DatabaseAndTableWithAlias> & tables)
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<TableWithColumnNames> & tables)
{
FindIdentifierBestTableVisitor::Data find_data(tables);
FindIdentifierBestTableVisitor(find_data).visit(expression);

View File

@ -2,6 +2,8 @@
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <map>
namespace DB
{
@ -70,11 +72,11 @@ private:
std::vector<ASTPtr> splitConjunctionPredicate(ASTPtr & predicate_expression);
std::vector<IdentifierWithQualifier> getDependenciesAndQualifiers(ASTPtr & expression,
std::vector<DatabaseAndTableWithAlias> & tables_with_aliases);
std::vector<TableWithColumnNames> & tables_with_aliases);
bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery);
bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
bool optimizeImpl(ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
bool allowPushDown(const ASTSelectQuery * subquery);

View File

@ -13,23 +13,26 @@
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <DataTypes/NestedUtils.h>
#include <Common/typeid_cast.h>
#include <Core/NamesAndTypes.h>
#include <Storages/IStorage.h>
#include <IO/WriteHelpers.h>
#include <Storages/IStorage.h>
#include <Common/typeid_cast.h>
#include <functional>
namespace DB
{
@ -107,7 +110,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
visitor.visit(query);
}
bool hasArrayJoin(const ASTPtr & ast)
{
if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
@ -591,8 +593,30 @@ Names qualifyOccupiedNames(NamesAndTypesList & columns, const NameSet & source_c
return originals;
}
void replaceJoinedTable(const ASTTablesInSelectQueryElement* join)
{
if (!join || !join->table_expression)
return;
auto & table_expr = static_cast<ASTTableExpression &>(*join->table_expression.get());
if (table_expr.database_and_table_name)
{
auto & table_id = typeid_cast<ASTIdentifier &>(*table_expr.database_and_table_name.get());
String expr = "(select * from " + table_id.name + ") as " + table_id.shortName();
// FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b",
// we can't replace aliased tables.
// FIXME: long table names include database name, which we can't save within alias.
if (table_id.alias.empty() && table_id.isShort())
{
ParserTableExpression parser;
table_expr = static_cast<ASTTableExpression &>(*parseQuery(parser, expr, 0));
}
}
}
} // namespace
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
ASTPtr & query,
@ -628,6 +652,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
{
if (const ASTTablesInSelectQueryElement * node = select_query->join())
{
replaceJoinedTable(node);
const auto & joined_expression = static_cast<const ASTTableExpression &>(*node->table_expression);
DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase());

View File

@ -14,13 +14,15 @@ public:
enum ExplainKind
{
ParsedAST,
AnalyzedSyntax,
};
ASTExplainQuery(ExplainKind kind_ = ParsedAST)
ASTExplainQuery(ExplainKind kind_)
: kind(kind_)
{}
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
ExplainKind getKind() const { return kind; }
ASTPtr clone() const override { return std::make_shared<ASTExplainQuery>(*this); }
protected:
@ -37,7 +39,9 @@ private:
switch (kind)
{
case ParsedAST: return "ParsedAST";
case AnalyzedSyntax: return "AnalyzedSyntax";
}
__builtin_unreachable();
}
};

View File

@ -23,7 +23,7 @@ public:
bool distinct = false;
ASTPtr with_expression_list;
ASTPtr select_expression_list;
ASTPtr tables;
ASTPtr tables; // pointer to TablesInSelectQuery
ASTPtr prewhere_expression;
ASTPtr where_expression;
ASTPtr group_expression_list;

View File

@ -68,9 +68,9 @@ private:
{
(*ostr) << nodeId();
String aslias = ast.tryGetAlias();
if (!aslias.empty())
print("alias", aslias, " ");
String alias = ast.tryGetAlias();
if (!alias.empty())
print("alias", alias, " ");
if (!ast.children.empty())
print("children", ast.children.size(), " ");

View File

@ -36,11 +36,16 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
ASTPtr query;
ParserKeyword s_ast("AST");
ParserKeyword s_analyze("ANALYZE");
bool explain_ast = false;
bool analyze_syntax = false;
if (enable_explain && s_ast.ignore(pos, expected))
explain_ast = true;
if (enable_explain && s_analyze.ignore(pos, expected))
analyze_syntax = true;
bool parsed = select_p.parse(pos, query, expected)
|| show_tables_p.parse(pos, query, expected)
|| table_p.parse(pos, query, expected)
@ -94,7 +99,12 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (explain_ast)
{
node = std::make_shared<ASTExplainQuery>();
node = std::make_shared<ASTExplainQuery>(ASTExplainQuery::ParsedAST);
node->children.push_back(query);
}
else if (analyze_syntax)
{
node = std::make_shared<ASTExplainQuery>(ASTExplainQuery::AnalyzedSyntax);
node->children.push_back(query);
}
else

View File

@ -277,11 +277,12 @@ def main(args):
report_testcase.append(skipped)
print("{0} - no reference file".format(MSG_UNKNOWN))
else:
result_is_different = subprocess.call(['cmp', '-s', reference_file, stdout_file], stdout = PIPE)
result_is_different = subprocess.call(['diff', '-q', reference_file, stdout_file], stdout = PIPE)
if result_is_different:
(diff, _) = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()
diff = Popen(['diff', '--unified', reference_file, stdout_file], stdout = PIPE).communicate()[0]
diff = unicode(diff, errors='replace', encoding='utf-8')
cat = Popen(['cat', '-A'], stdin=PIPE, stdout=PIPE).communicate(input=diff)[0]
failure = et.Element("failure", attrib = {"message": "result differs with reference"})
report_testcase.append(failure)
@ -294,7 +295,7 @@ def main(args):
report_testcase.append(stdout_element)
failures += 1
print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, diff.encode('utf-8')))
print("{0} - result differs with reference:\n{1}".format(MSG_FAIL, cat.encode('utf-8')))
else:
passed_total += 1
failures_chain = 0

View File

@ -1,36 +1,55 @@
-------Not need optimize predicate, but it works.-------
-------No need for predicate optimization, but still works-------
1
1
1
2000-01-01 1 test string 1 1
-------Need push down-------
SELECT dummy\nFROM system.one \nANY LEFT JOIN \n(\n SELECT 0 AS dummy\n WHERE 1\n) USING (dummy)\nWHERE 1
0
SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n WHERE toString(value) = \'1\'\n) \nWHERE value = \'1\'
1
SELECT id\nFROM \n(\n SELECT 1 AS id\n WHERE id = 1\n UNION ALL\n SELECT 2 AS `2`\n WHERE `2` = 1\n) \nWHERE id = 1
1
SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1
1
SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n) \nWHERE id = 1
1
SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n WHERE subquery = 1\n) \nWHERE subquery = 1
1 1
SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test.test \n HAVING a = 3\n) \nWHERE a = 3
3 3
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test.test \n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test.test AS table_alias \n HAVING b = 3\n) AS outer_table_alias \nWHERE outer_table_alias.b = 3
3 3
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
3 3
-------Force push down-------
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) AS b \n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) \n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) AS b \nWHERE b.id = 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) AS a \n WHERE id = 1\n) AS b \nWHERE b.id = 1
2000-01-01 1 test string 1 1
SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test.test \n WHERE id = 1\n GROUP BY \n id, \n date\n) \nWHERE id = 1
1 2000-01-01 1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE id = 1\n) USING (id)\nWHERE id = 1
2000-01-01 1 test string 1 1 2000-01-01 test string 1 1
2000-01-01 1 test string 1 1
1 2000-01-01 1 test string 1 1
2000-01-01 1 test string 1 1
2000-01-01 2 test string 2 2
1
1
-------Push to having expression, need check.-------
-------Compatibility test-------
SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test.test USING (id)\nWHERE value = 1
1 2000-01-01 test string 1 1
SELECT b.value\nFROM \n(\n SELECT toInt8(1) AS id\n) \nANY LEFT JOIN test.test AS b USING (id)\nWHERE value = 1
1
SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n WHERE id = 1\n ) \n ANY LEFT JOIN \n (\n SELECT *\n FROM test.test \n WHERE id = 1\n ) USING (id)\n WHERE id = 1\n) \nWHERE id = 1
2000-01-01 1 test string 1 1
SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test.test \n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE id = 1\n) AS b USING (id)\nWHERE b.id = 1
2000-01-01 1 test string 1 1 2000-01-01 test string 1 1
SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers \n LIMIT 1\n) \nANY LEFT JOIN \n(\n SELECT *\n FROM test.test \n WHERE date = toDate(\'2000-01-01\')\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\')
1 2000-01-01 test string 1 1

View File

@ -1,10 +1,8 @@
SET send_logs_level = 'none';
DROP TABLE IF EXISTS test.perf;
DROP TABLE IF EXISTS test.test;
DROP TABLE IF EXISTS test.test_view;
CREATE TABLE test.perf(site String, user_id UInt64, z Float64)ENGINE = Log;
CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192);
CREATE VIEW test.test_view AS SELECT * FROM test.test;
@ -12,71 +10,103 @@ INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1);
INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2);
SET enable_optimize_predicate_expression = 1;
SET enable_debug_queries = 1;
SELECT '-------Not need optimize predicate, but it works.-------';
SELECT '-------No need for predicate optimization, but still works-------';
SELECT 1;
SELECT 1 AS id WHERE id = 1;
SELECT arrayJoin([1,2,3]) AS id WHERE id = 1;
SELECT * FROM (SELECT perf_1.z AS z_1 FROM test.perf AS perf_1);
SELECT * FROM test.test WHERE id = 1;
SELECT '-------Need push down-------';
SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1;
SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1';
SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1;
SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT * FROM (SELECT perf_1.z AS z_1 FROM test.perf AS perf_1) WHERE z_1 = 1;
-- Optimize predicate expressions without tables
ANALYZE SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1;
SELECT * FROM system.one ANY LEFT JOIN (SELECT 0 AS dummy) USING dummy WHERE 1;
ANALYZE SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1';
SELECT toString(value) AS value FROM (SELECT 1 AS value) WHERE value = '1';
ANALYZE SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1;
SELECT * FROM (SELECT 1 AS id UNION ALL SELECT 2) WHERE id = 1;
ANALYZE SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
ANALYZE SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1;
ANALYZE SELECT * FROM (SELECT 1 AS id, (SELECT 1) as subquery) WHERE subquery = 1;
SELECT * FROM (SELECT 1 AS id, (SELECT 1) as subquery) WHERE subquery = 1;
-- Optimize predicate expressions using tables
ANALYZE SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3;
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3;
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3;
ANALYZE SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
SELECT date, id, name, value FROM (SELECT date, name, value, min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1;
ANALYZE SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3;
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3;
SELECT '-------Force push down-------';
SET force_primary_key = 1;
-- Optimize predicate expression with asterisk
ANALYZE SELECT * FROM (SELECT * FROM test.test) WHERE id = 1;
SELECT * FROM (SELECT * FROM test.test) WHERE id = 1;
-- Optimize predicate expression with asterisk and nested subquery
ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1;
SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1;
-- Optimize predicate expression with qualified asterisk
ANALYZE SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1;
SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1;
-- Optimize predicate expression without asterisk
ANALYZE SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1;
SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1;
-- Optimize predicate expression without asterisk and contains nested subquery
ANALYZE SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1;
SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1;
-- Optimize predicate expression with qualified
ANALYZE SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1;
SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1;
-- Optimize predicate expression with qualified and nested subquery
ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1;
SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1;
-- Optimize predicate expression with aggregate function
ANALYZE SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1;
SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1;
-- Optimize predicate expression with union all query
ANALYZE SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1;
SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1;
-- Optimize predicate expression with join query
ANALYZE SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1;
SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1;
ANALYZE SELECT * FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test USING id WHERE value = 1;
SELECT * FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test USING id WHERE value = 1;
-- FIXME: no support for aliased tables for now.
ANALYZE SELECT b.value FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test AS b USING id WHERE value = 1;
SELECT b.value FROM (SELECT toInt8(1) AS id) ANY LEFT JOIN test.test AS b USING id WHERE value = 1;
-- Optimize predicate expression with join and nested subquery
ANALYZE SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1;
SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1;
-- Optimize predicate expression with join query and qualified
SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.id = 1;
ANALYZE SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING id WHERE b.id = 1;
SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING id WHERE b.id = 1;
-- Optimize predicate expression with view
SELECT * FROM test.test_view WHERE id = 1;
SELECT * FROM test.test_view WHERE id = 2;
SELECT id FROM test.test_view WHERE id = 1;
SELECT s.id FROM test.test_view AS s WHERE s.id = 1;
SELECT '-------Push to having expression, need check.-------';
SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 }
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test) WHERE a = 3; -- { serverError 277 }
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3; -- { serverError 277 }
SELECT * FROM (SELECT toUInt64(b) AS a, sum(id) AS b FROM test.test AS table_alias) AS outer_table_alias WHERE outer_table_alias.b = 3; -- { serverError 277 }
SELECT '-------Compatibility test-------';
-- Compatibility test
ANALYZE SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01');
SELECT * FROM (SELECT toInt8(1) AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date, id WHERE b.date = toDate('2000-01-01');
DROP TABLE IF EXISTS test.perf;
DROP TABLE IF EXISTS test.test;
DROP TABLE IF EXISTS test.test_view;

View File

@ -0,0 +1 @@
SELECT \n a, \n b\nFROM test.a

View File

@ -0,0 +1,8 @@
set enable_debug_queries = 1;
DROP TABLE IF EXISTS test.a;
CREATE TABLE test.a (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a;
ANALYZE SELECT * FROM test.a;
DROP TABLE test.a;

View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS test.test;
DROP TABLE IF EXISTS test.test_view;
CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192);
CREATE VIEW test.test_view AS SELECT * FROM test.test;
SET enable_optimize_predicate_expression = 1;
SET enable_debug_queries = 1;
-- Optimize predicate expression with view
-- TODO: simple view is not replaced with subquery inside syntax analyzer
ANALYZE SELECT * FROM test.test_view WHERE id = 1;
ANALYZE SELECT * FROM test.test_view WHERE id = 2;
ANALYZE SELECT id FROM test.test_view WHERE id = 1;
ANALYZE SELECT s.id FROM test.test_view AS s WHERE s.id = 1;
-- TODO: this query shouldn't work, because the name `toUInt64(sum(id))` is undefined for user
SELECT * FROM (SELECT toUInt64(b), sum(id) AS b FROM test.test) WHERE `toUInt64(sum(id))` = 3;