2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
2018-04-20 00:20:36 +00:00
|
|
|
#include <Storages/MergeTree/KeyCondition.h>
|
2019-01-25 15:42:24 +00:00
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTIdentifier.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTSubquery.h>
|
|
|
|
#include <Parsers/formatAST.h>
|
2019-01-22 12:33:56 +00:00
|
|
|
#include <Interpreters/QueryNormalizer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/escapeForFileName.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2017-12-25 18:58:39 +00:00
|
|
|
#include <DataTypes/NestedUtils.h>
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/scope_guard.h>
|
2018-06-30 21:35:01 +00:00
|
|
|
#include <ext/collection_cast.h>
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/map.h>
|
2017-01-14 09:00:19 +00:00
|
|
|
#include <memory>
|
|
|
|
#include <unordered_map>
|
2019-02-14 19:59:51 +00:00
|
|
|
#include <tuple>
|
2017-01-14 09:00:19 +00:00
|
|
|
#include <cstddef>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
static constexpr auto threshold = 2;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
|
|
|
|
|
|
|
MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
|
2017-07-15 03:48:36 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2017-07-14 00:33:37 +00:00
|
|
|
const Context & context,
|
|
|
|
const MergeTreeData & data,
|
|
|
|
const Names & column_names,
|
2017-04-01 07:20:54 +00:00
|
|
|
Logger * log)
|
2019-02-14 16:48:41 +00:00
|
|
|
: table_columns{ext::map<std::unordered_set>(data.getColumns().getAllPhysical(),
|
2017-12-25 21:57:29 +00:00
|
|
|
[] (const NameAndTypePair & col) { return col.name; })},
|
2018-11-08 17:28:52 +00:00
|
|
|
block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)},
|
2017-04-01 07:20:54 +00:00
|
|
|
log{log}
|
2017-01-14 09:00:19 +00:00
|
|
|
{
|
2019-02-14 16:48:41 +00:00
|
|
|
if (!data.primary_key_columns.empty())
|
|
|
|
first_primary_key_column = data.primary_key_columns[0];
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
calculateColumnSizes(data, column_names);
|
2017-07-15 03:48:36 +00:00
|
|
|
auto & select = typeid_cast<ASTSelectQuery &>(*query_info.query);
|
2017-04-01 07:20:54 +00:00
|
|
|
determineArrayJoinedNames(select);
|
|
|
|
optimize(select);
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeWhereOptimizer::optimize(ASTSelectQuery & select) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!select.where_expression || select.prewhere_expression)
|
|
|
|
return;
|
|
|
|
|
|
|
|
const auto function = typeid_cast<ASTFunction *>(select.where_expression.get());
|
2019-02-14 19:59:51 +00:00
|
|
|
if (function && function->name == "and")
|
2017-04-01 07:20:54 +00:00
|
|
|
optimizeConjunction(select, function);
|
|
|
|
else
|
|
|
|
optimizeArbitrary(select);
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeWhereOptimizer::calculateColumnSizes(const MergeTreeData & data, const Names & column_names)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & column_name : column_names)
|
2019-02-14 19:59:51 +00:00
|
|
|
column_sizes[column_name] = data.getColumnCompressedSize(column_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
struct ConditionCandidate
|
|
|
|
{
|
|
|
|
size_t columns_size;
|
|
|
|
int64_t position;
|
|
|
|
IdentifierNameSet identifiers;
|
|
|
|
bool is_good;
|
|
|
|
|
|
|
|
auto tuple() const
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-02-14 19:59:51 +00:00
|
|
|
/// We'll move conditions from back to keep "position".
|
|
|
|
return std::forward_as_tuple(!is_good, columns_size, -position);
|
|
|
|
}
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
bool operator< (const ConditionCandidate & rhs) const
|
|
|
|
{
|
|
|
|
return tuple() < rhs.tuple();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-02-14 19:59:51 +00:00
|
|
|
};
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeWhereOptimizer::optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const
|
|
|
|
{
|
2019-02-14 19:59:51 +00:00
|
|
|
std::vector<ConditionCandidate> condition_candidates;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
auto & conditions = fun->arguments->children;
|
|
|
|
|
|
|
|
/// remove condition by swapping it with the last one and calling ::pop_back()
|
2018-01-10 00:04:08 +00:00
|
|
|
const auto remove_condition_at_index = [&conditions] (const size_t idx)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (idx < conditions.size() - 1)
|
|
|
|
std::swap(conditions[idx], conditions.back());
|
|
|
|
conditions.pop_back();
|
|
|
|
};
|
|
|
|
|
2019-02-14 20:09:44 +00:00
|
|
|
/// linearize conjunction and extract possible conditions to move
|
2017-07-21 06:35:58 +00:00
|
|
|
for (size_t idx = 0; idx < conditions.size();)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
const auto condition = conditions[idx].get();
|
|
|
|
|
|
|
|
/// linearize sub-conjunctions
|
|
|
|
if (const auto function = typeid_cast<ASTFunction *>(condition))
|
|
|
|
{
|
2019-02-14 19:59:51 +00:00
|
|
|
if (function->name == "and")
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
for (auto & child : function->arguments->children)
|
|
|
|
conditions.emplace_back(std::move(child));
|
|
|
|
|
|
|
|
/// remove the condition corresponding to conjunction
|
|
|
|
remove_condition_at_index(idx);
|
|
|
|
|
|
|
|
/// continue iterating without increment to ensure the just added conditions are processed
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SCOPE_EXIT(++idx);
|
|
|
|
|
2019-01-14 18:15:04 +00:00
|
|
|
if (cannotBeMoved(conditions[idx]))
|
2017-04-01 07:20:54 +00:00
|
|
|
continue;
|
|
|
|
|
2019-02-14 20:09:44 +00:00
|
|
|
IdentifierNameSet identifiers;
|
2017-04-01 07:20:54 +00:00
|
|
|
collectIdentifiersNoSubqueries(condition, identifiers);
|
|
|
|
|
2019-02-14 16:48:41 +00:00
|
|
|
/// do not take into consideration the conditions consisting only of the first primary key column
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!hasPrimaryKeyAtoms(condition) && isSubsetOfTableColumns(identifiers))
|
|
|
|
{
|
2019-02-14 19:59:51 +00:00
|
|
|
ConditionCandidate candidate;
|
|
|
|
candidate.position = idx;
|
|
|
|
candidate.columns_size = getIdentifiersColumnSize(identifiers);
|
|
|
|
candidate.is_good = isConditionGood(condition);
|
|
|
|
candidate.identifiers = identifiers;
|
|
|
|
condition_candidates.emplace_back(std::move(candidate));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
if (condition_candidates.empty())
|
|
|
|
return;
|
|
|
|
|
2018-01-10 00:04:08 +00:00
|
|
|
const auto move_condition_to_prewhere = [&] (const size_t idx)
|
|
|
|
{
|
2019-02-14 19:59:51 +00:00
|
|
|
if (!select.prewhere_expression)
|
|
|
|
{
|
|
|
|
select.prewhere_expression = conditions[idx];
|
|
|
|
select.children.push_back(select.prewhere_expression);
|
|
|
|
}
|
|
|
|
else if (auto func_and = typeid_cast<ASTFunction *>(select.prewhere_expression.get()); func_and && func_and->name == "and")
|
|
|
|
{
|
|
|
|
/// Add argument to AND chain
|
|
|
|
|
|
|
|
func_and->arguments->children.emplace_back(conditions[idx]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Make old_cond AND new_cond
|
|
|
|
|
|
|
|
auto func = std::make_shared<ASTFunction>();
|
|
|
|
func->name = "and";
|
|
|
|
func->arguments->children = {select.prewhere_expression, conditions[idx]};
|
|
|
|
|
|
|
|
select.children.clear();
|
|
|
|
select.prewhere_expression = std::move(func);
|
|
|
|
select.children.push_back(select.prewhere_expression);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-14 20:09:44 +00:00
|
|
|
remove_condition_at_index(idx);
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
/// Lightest conditions first. NOTE The algorithm is suboptimal, replace with priority_queue if you want.
|
|
|
|
std::sort(condition_candidates.begin(), condition_candidates.end());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
/// Pick the best condition and also all other conditions with the same set of columns.
|
|
|
|
/// For example, if we take "EventTime >= '2014-03-20 00:00:00'", we will also take "EventTime < '2014-03-21 00:00:00'".
|
|
|
|
|
|
|
|
IdentifierNameSet identifiers_of_moved_condition = condition_candidates[0].identifiers;
|
|
|
|
move_condition_to_prewhere(condition_candidates[0].position);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
for (size_t i = 1, size = condition_candidates.size(); i < size; ++i)
|
|
|
|
{
|
|
|
|
if (identifiers_of_moved_condition == condition_candidates[i].identifiers)
|
|
|
|
move_condition_to_prewhere(condition_candidates[i].position);
|
|
|
|
else
|
|
|
|
break;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-02-14 19:59:51 +00:00
|
|
|
|
2019-02-14 20:09:44 +00:00
|
|
|
/** Replace conjunction with the only remaining argument if only two conditions were present,
|
|
|
|
* remove selected condition from conjunction otherwise.
|
|
|
|
*/
|
|
|
|
if (conditions.size() == 1)
|
|
|
|
{
|
|
|
|
/// find old where_expression in children of select
|
|
|
|
const auto it = std::find(std::begin(select.children), std::end(select.children), select.where_expression);
|
|
|
|
/// replace where_expression with the remaining argument
|
|
|
|
select.where_expression = std::move(conditions.front());
|
|
|
|
/// overwrite child entry with the new where_expression
|
|
|
|
*it = select.where_expression;
|
|
|
|
}
|
|
|
|
else if (conditions.empty())
|
|
|
|
{
|
|
|
|
select.children.erase(std::find(std::begin(select.children), std::end(select.children), select.where_expression));
|
|
|
|
select.where_expression.reset();
|
|
|
|
}
|
|
|
|
|
2019-02-14 19:59:51 +00:00
|
|
|
if (select.prewhere_expression)
|
|
|
|
LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"" << select.prewhere_expression << "\" moved to PREWHERE");
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeWhereOptimizer::optimizeArbitrary(ASTSelectQuery & select) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
auto & condition = select.where_expression;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// do not optimize restricted expressions
|
2019-01-14 18:15:04 +00:00
|
|
|
if (cannotBeMoved(select.where_expression))
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
IdentifierNameSet identifiers{};
|
|
|
|
collectIdentifiersNoSubqueries(condition.get(), identifiers);
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (hasPrimaryKeyAtoms(condition.get()) || !isSubsetOfTableColumns(identifiers))
|
|
|
|
return;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// add the condition to PREWHERE, remove it from WHERE
|
|
|
|
std::swap(select.prewhere_expression, condition);
|
|
|
|
LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition `" << select.prewhere_expression << "` moved to PREWHERE");
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t MergeTreeWhereOptimizer::getIdentifiersColumnSize(const IdentifierNameSet & identifiers) const
|
2017-01-14 09:00:19 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
/** for expressions containing no columns (or where columns could not be determined otherwise) assume maximum
|
|
|
|
* possible size so they do not have priority in eligibility over other expressions. */
|
|
|
|
if (identifiers.empty())
|
2017-07-21 06:35:58 +00:00
|
|
|
return std::numeric_limits<size_t>::max();
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t size{};
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & identifier : identifiers)
|
|
|
|
if (column_sizes.count(identifier))
|
|
|
|
size += column_sizes.find(identifier)->second;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return size;
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeWhereOptimizer::isConditionGood(const IAST * condition) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const auto function = typeid_cast<const ASTFunction *>(condition);
|
|
|
|
if (!function)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/** we are only considering conditions of form `equals(one, another)` or `one = another`,
|
|
|
|
* especially if either `one` or `another` is ASTIdentifier */
|
2019-02-14 19:59:51 +00:00
|
|
|
if (function->name != "equals")
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
auto left_arg = function->arguments->children.front().get();
|
|
|
|
auto right_arg = function->arguments->children.back().get();
|
|
|
|
|
|
|
|
/// try to ensure left_arg points to ASTIdentifier
|
2019-01-14 18:15:04 +00:00
|
|
|
if (!isIdentifier(left_arg) && isIdentifier(right_arg))
|
2017-04-01 07:20:54 +00:00
|
|
|
std::swap(left_arg, right_arg);
|
|
|
|
|
2019-01-14 18:15:04 +00:00
|
|
|
if (isIdentifier(left_arg))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// condition may be "good" if only right_arg is a constant and its value is outside the threshold
|
|
|
|
if (const auto literal = typeid_cast<const ASTLiteral *>(right_arg))
|
|
|
|
{
|
|
|
|
const auto & field = literal->value;
|
|
|
|
const auto type = field.getType();
|
|
|
|
|
|
|
|
/// check the value with respect to threshold
|
|
|
|
if (type == Field::Types::UInt64)
|
|
|
|
{
|
|
|
|
const auto value = field.get<UInt64>();
|
|
|
|
return value > threshold;
|
|
|
|
}
|
|
|
|
else if (type == Field::Types::Int64)
|
|
|
|
{
|
|
|
|
const auto value = field.get<Int64>();
|
|
|
|
return value < -threshold || threshold < value;
|
|
|
|
}
|
|
|
|
else if (type == Field::Types::Float64)
|
|
|
|
{
|
|
|
|
const auto value = field.get<Float64>();
|
|
|
|
return value < threshold || threshold < value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeWhereOptimizer::collectIdentifiersNoSubqueries(const IAST * const ast, IdentifierNameSet & set)
|
|
|
|
{
|
2019-01-14 18:15:04 +00:00
|
|
|
if (auto opt_name = getIdentifierName(ast))
|
|
|
|
return (void) set.insert(*opt_name);
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (typeid_cast<const ASTSubquery *>(ast))
|
|
|
|
return;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & child : ast->children)
|
|
|
|
collectIdentifiersNoSubqueries(child.get(), set);
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeWhereOptimizer::hasPrimaryKeyAtoms(const IAST * ast) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (const auto func = typeid_cast<const ASTFunction *>(ast))
|
|
|
|
{
|
|
|
|
const auto & args = func->arguments->children;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if ((func->name == "not" && 1 == args.size()) || func->name == "and" || func->name == "or")
|
|
|
|
{
|
|
|
|
for (const auto & arg : args)
|
|
|
|
if (hasPrimaryKeyAtoms(arg.get()))
|
|
|
|
return true;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return isPrimaryKeyAtom(ast);
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const IAST * const ast) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (const auto func = typeid_cast<const ASTFunction *>(ast))
|
|
|
|
{
|
2018-04-20 00:20:36 +00:00
|
|
|
if (!KeyCondition::atom_map.count(func->name))
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto & args = func->arguments->children;
|
|
|
|
if (args.size() != 2)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
const auto & first_arg_name = args.front()->getColumnName();
|
|
|
|
const auto & second_arg_name = args.back()->getColumnName();
|
|
|
|
|
2019-02-14 16:48:41 +00:00
|
|
|
if ((first_primary_key_column == first_arg_name && isConstant(args[1]))
|
|
|
|
|| (first_primary_key_column == second_arg_name && isConstant(args[0]))
|
|
|
|
|| (first_primary_key_column == first_arg_name && functionIsInOrGlobalInOperator(func->name)))
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeWhereOptimizer::isConstant(const ASTPtr & expr) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const auto column_name = expr->getColumnName();
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (typeid_cast<const ASTLiteral *>(expr.get()) ||
|
2017-12-09 10:14:45 +00:00
|
|
|
(block_with_constants.has(column_name) && block_with_constants.getByName(column_name).column->isColumnConst()))
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool MergeTreeWhereOptimizer::isSubsetOfTableColumns(const IdentifierNameSet & identifiers) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & identifier : identifiers)
|
|
|
|
if (table_columns.count(identifier) == 0)
|
|
|
|
return false;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-01-14 18:15:04 +00:00
|
|
|
bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const
|
2017-01-14 09:00:19 +00:00
|
|
|
{
|
2019-01-14 18:15:04 +00:00
|
|
|
if (const auto function_ptr = typeid_cast<const ASTFunction *>(ptr.get()))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// disallow arrayJoin expressions to be moved to PREWHERE for now
|
2019-02-14 19:59:51 +00:00
|
|
|
if ("arrayJoin" == function_ptr->name)
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
/// disallow GLOBAL IN, GLOBAL NOT IN
|
2019-02-14 19:59:51 +00:00
|
|
|
if ("globalIn" == function_ptr->name
|
|
|
|
|| "globalNotIn" == function_ptr->name)
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
/// indexHint is a special function that it does not make sense to transfer to PREWHERE
|
|
|
|
if ("indexHint" == function_ptr->name)
|
|
|
|
return true;
|
|
|
|
}
|
2019-01-25 15:42:24 +00:00
|
|
|
else if (auto opt_name = IdentifierSemantic::getColumnName(ptr))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
/// disallow moving result of ARRAY JOIN to PREWHERE
|
2019-01-14 18:15:04 +00:00
|
|
|
if (array_joined_names.count(*opt_name) ||
|
|
|
|
array_joined_names.count(Nested::extractTableName(*opt_name)))
|
|
|
|
return true;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto & child : ptr->children)
|
2019-01-14 18:15:04 +00:00
|
|
|
if (cannotBeMoved(child))
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void MergeTreeWhereOptimizer::determineArrayJoinedNames(ASTSelectQuery & select)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
auto array_join_expression_list = select.array_join_expression_list();
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// much simplified code from ExpressionAnalyzer::getArrayJoinedColumns()
|
|
|
|
if (!array_join_expression_list)
|
|
|
|
return;
|
2017-01-14 09:00:19 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & ast : array_join_expression_list->children)
|
|
|
|
array_joined_names.emplace(ast->getAliasOrColumnName());
|
2017-01-14 09:00:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|