Merge branch 'master' into generic-case-for-function-if

This commit is contained in:
Alexey Milovidov 2019-02-03 16:01:56 +03:00
commit 2129094ba3
15 changed files with 217 additions and 72 deletions

View File

@ -151,6 +151,8 @@ public:
#endif
virtual bool isStateful() const { return false; }
/** Should we evaluate this function while constant folding, if arguments are constants?
* Usually this is true. Notable counterexample is function 'sleep'.
* If we will call it during query analysis, we will sleep extra amount of time.
@ -230,6 +232,9 @@ public:
/// Get the main function name.
virtual String getName() const = 0;
/// Override and return true if function needs to depend on the state of the data.
virtual bool isStateful() const { return false; }
/// Override and return true if function could take different number of arguments.
virtual bool isVariadic() const { return false; }
@ -322,6 +327,9 @@ class IFunction : public std::enable_shared_from_this<IFunction>,
{
public:
String getName() const override = 0;
bool isStateful() const override { return false; }
/// TODO: make const
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override = 0;
@ -478,6 +486,7 @@ public:
}
String getName() const override { return function->getName(); }
bool isStateful() const override { return function->isStateful(); }
bool isVariadic() const override { return function->isVariadic(); }
size_t getNumberOfArguments() const override { return function->getNumberOfArguments(); }

View File

@ -27,6 +27,11 @@ public:
return name;
}
bool isStateful() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 0;

View File

@ -33,6 +33,11 @@ public:
return name;
}
bool isStateful() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 1;

View File

@ -27,6 +27,11 @@ public:
return name;
}
bool isStateful() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 0;

View File

@ -22,6 +22,11 @@ public:
return name;
}
bool isStateful() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 0;

View File

@ -41,6 +41,11 @@ public:
return name;
}
bool isStateful() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 1;

View File

@ -130,6 +130,11 @@ public:
return name;
}
bool isStateful() const override
{
return true;
}
size_t getNumberOfArguments() const override
{
return 1;

View File

@ -0,0 +1,16 @@
#include <Interpreters/ExtractFunctionDataVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
void ExtractFunctionData::visit(ASTFunction & function, ASTPtr &)
{
if (AggregateFunctionFactory::instance().isAggregateFunctionName(function.name))
aggregate_functions.emplace_back(&function);
else
functions.emplace_back(&function);
}
}

View File

@ -0,0 +1,25 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
struct ExtractFunctionData
{
using TypeToVisit = ASTFunction;
std::vector<ASTFunction *> functions;
std::vector<ASTFunction *> aggregate_functions;
void visit(ASTFunction & identifier, ASTPtr &);
};
using ExtractFunctionMatcher = OneTypeMatcher<ExtractFunctionData>;
using ExtractFunctionVisitor = InDepthNodeVisitor<ExtractFunctionMatcher, true>;
}

View File

@ -0,0 +1,39 @@
#include <Interpreters/FindIdentifierBestTableVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
namespace DB
{
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_)
: tables(tables_)
{
}
void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)
{
const DatabaseAndTableWithAlias * best_table = nullptr;
if (!identifier.compound())
{
if (!tables.empty())
best_table = &tables[0];
}
else
{
size_t best_match = 0;
for (const DatabaseAndTableWithAlias & table : tables)
{
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table))
if (match > best_match)
{
best_match = match;
best_table = &table;
}
}
}
identifier_table.emplace_back(&identifier, best_table);
}
}

View File

@ -0,0 +1,24 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
struct FindIdentifierBestTableData
{
using TypeToVisit = ASTIdentifier;
const std::vector<DatabaseAndTableWithAlias> & tables;
std::vector<std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>> identifier_table;
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);
void visit(ASTIdentifier & identifier, ASTPtr &);
};
using FindIdentifierBestTableMatcher = OneTypeMatcher<FindIdentifierBestTableData>;
using FindIdentifierBestTableVisitor = InDepthNodeVisitor<FindIdentifierBestTableMatcher, true>;
}

View File

@ -20,7 +20,10 @@
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include "TranslateQualifiedNamesVisitor.h"
#include <Interpreters/TranslateQualifiedNamesVisitor.h>
#include <Interpreters/FindIdentifierBestTableVisitor.h>
#include <Interpreters/ExtractFunctionDataVisitor.h>
#include <Functions/FunctionFactory.h>
namespace DB
{
@ -33,65 +36,13 @@ namespace ErrorCodes
static constexpr auto and_function_name = "and";
struct FindIdentifierBestTableData
{
using TypeToVisit = ASTIdentifier;
const std::vector<DatabaseAndTableWithAlias> & tables;
std::vector<std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>> identifier_table;
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_)
: tables(tables_)
{}
void visit(ASTIdentifier & identifier, ASTPtr &)
{
const DatabaseAndTableWithAlias * best_table = nullptr;
if (!identifier.compound())
{
if (!tables.empty())
best_table = &tables[0];
}
else
{
size_t best_match = 0;
for (const DatabaseAndTableWithAlias & table : tables)
{
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table))
if (match > best_match)
{
best_match = match;
best_table = &table;
}
}
}
identifier_table.emplace_back(&identifier, best_table);
}
};
using FindIdentifierBestTableMatcher = OneTypeMatcher<FindIdentifierBestTableData>;
using FindIdentifierBestTableVisitor = InDepthNodeVisitor<FindIdentifierBestTableMatcher, true>;
static bool allowPushDown(const ASTSelectQuery * subquery)
{
return subquery &&
!subquery->final() &&
!subquery->limit_by_expression_list &&
!subquery->limit_length &&
!subquery->with_expression_list;
}
PredicateExpressionsOptimizer::PredicateExpressionsOptimizer(
ASTSelectQuery * ast_select_, ExtractedSettings && settings_, const Context & context_)
: ast_select(ast_select_), settings(settings_), context(context_)
{
}
bool PredicateExpressionsOptimizer::optimize()
{
if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty())
@ -158,6 +109,27 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
return is_rewrite_subquery;
}
bool PredicateExpressionsOptimizer::allowPushDown(const ASTSelectQuery * subquery)
{
if (subquery && !subquery->final() && !subquery->limit_by_expression_list && !subquery->limit_length && !subquery->with_expression_list)
{
ASTPtr expr_list = ast_select->select_expression_list;
ExtractFunctionVisitor::Data extract_data;
ExtractFunctionVisitor(extract_data).visit(expr_list);
for (const auto & subquery_function : extract_data.functions)
{
const auto & function = FunctionFactory::instance().get(subquery_function->name, context);
if (function->isStateful())
return false;
}
return true;
}
return false;
}
std::vector<ASTPtr> PredicateExpressionsOptimizer::splitConjunctionPredicate(ASTPtr & predicate_expression)
{
std::vector<ASTPtr> predicate_expressions;
@ -236,7 +208,11 @@ bool PredicateExpressionsOptimizer::canPushDownOuterPredicate(
if (alias == qualified_name)
{
is_found = true;
if (isAggregateFunction(ast))
ASTPtr projection_column = ast;
ExtractFunctionVisitor::Data extract_data;
ExtractFunctionVisitor(extract_data).visit(projection_column);
if (!extract_data.aggregate_functions.empty())
optimize_kind = OptimizeKind::PUSH_TO_HAVING;
}
}
@ -284,21 +260,6 @@ bool PredicateExpressionsOptimizer::isArrayJoinFunction(const ASTPtr & node)
return false;
}
bool PredicateExpressionsOptimizer::isAggregateFunction(const ASTPtr & node)
{
if (auto function = typeid_cast<const ASTFunction *>(node.get()))
{
if (AggregateFunctionFactory::instance().isAggregateFunctionName(function->name))
return true;
}
for (const auto & child : node->children)
if (isAggregateFunction(child))
return true;
return false;
}
bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery)
{
ASTPtr new_subquery_expression = subquery_expression;

View File

@ -65,8 +65,6 @@ private:
PUSH_TO_HAVING,
};
bool isAggregateFunction(const ASTPtr & node);
bool isArrayJoinFunction(const ASTPtr & node);
std::vector<ASTPtr> splitConjunctionPredicate(ASTPtr & predicate_expression);
@ -78,6 +76,8 @@ private:
bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
bool allowPushDown(const ASTSelectQuery * subquery);
bool canPushDownOuterPredicate(const std::vector<ProjectionWithAlias> & subquery_projection_columns,
const std::vector<IdentifierWithQualifier> & outer_predicate_dependencies,
OptimizeKind & optimize_kind);

View File

@ -0,0 +1,9 @@
-------ENABLE OPTIMIZE PREDICATE-------
2000-01-01 1 test string 1 1 1
2000-01-01 1 test string 1 1 1
1
-------FORCE PRIMARY KEY-------
-------CHECK STATEFUL FUNCTIONS-------
1 a 0
2 b 0
2 a 0

View File

@ -0,0 +1,32 @@
SET send_logs_level = 'none';
DROP TABLE IF EXISTS test.test;
CREATE TABLE test.test(date Date, id Int8, name String, value Int64, sign Int8) ENGINE = CollapsingMergeTree(sign) ORDER BY (id, date);
INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1, 1);
INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2, 1);
SET enable_optimize_predicate_expression = 1;
SELECT '-------ENABLE OPTIMIZE PREDICATE-------';
SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1;
SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1;
SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1;
SET force_primary_key = 1;
SELECT '-------FORCE PRIMARY KEY-------';
SELECT * FROM (SELECT * FROM test.test FINAL) WHERE id = 1; -- { serverError 277 }
SELECT * FROM (SELECT * FROM test.test LIMIT 1) WHERE id = 1; -- { serverError 277 }
SELECT * FROM (SELECT id FROM test.test GROUP BY id LIMIT 1 BY id) WHERE id = 1; -- { serverError 277 }
SELECT '-------CHECK STATEFUL FUNCTIONS-------';
SELECT n, z, changed FROM (
SELECT n, z, runningDifferenceStartingWithFirstValue(n) AS changed FROM (
SELECT ts, n,z FROM system.one ARRAY JOIN [1,3,4,5,6] AS ts,
[1,2,2,2,1] AS n, ['a', 'a', 'b', 'a', 'b'] AS z
ORDER BY n, ts DESC
)
) WHERE changed = 0;
DROP TABLE IF EXISTS test.test;