This commit is contained in:
sundy-li 2020-12-13 00:42:15 +08:00
parent 20f0d39645
commit 9407028540
22 changed files with 334 additions and 164 deletions

View File

@ -364,6 +364,7 @@ class IColumn;
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
M(Bool, optimize_alias_column_prediction, true, "If it is set to true, it will rewrite the filter query with aliased columns, this could help with partition prune and secondary indexes. And also help with optimize_aggregation_in_order and optimize_read_in_order", 0) \
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \

View File

@ -1,5 +1,6 @@
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -8,43 +9,90 @@
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/queryToString.h>
namespace DB
{
bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
if (const auto * f = node->as<ASTFunction>())
{
/// "lambda" visit children itself.
if (f->name == "lambda")
return false;
}
return !(node->as<ASTTableExpression>()
|| node->as<ASTSubquery>()
|| node->as<ASTArrayJoin>()
|| node->as<ASTSelectQuery>()
|| node->as<ASTSelectWithUnionQuery>());
}
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
{
auto aa = queryToString(ast);
// If it's select query, only replace filters.
if (auto * query = ast->as<ASTSelectQuery>())
{
if (query->where())
Visitor(data).visit(query->refWhere());
if (query->prewhere())
Visitor(data).visit(query->refPrewhere());
return;
}
if (auto * node = ast->as<ASTFunction>())
{
visit(*node, ast, data);
return;
}
if (auto * node = ast->as<ASTIdentifier>())
{
if (auto column_name = IdentifierSemantic::getColumnName(*node))
{
if (const auto column_default = data.columns.getDefault(*column_name))
visit(*node, ast, data);
return;
}
}
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
{
/// Do not add formal parameters of the lambda expression
if (node.name == "lambda")
{
Names local_aliases;
for (const auto & name : RequiredSourceColumnsMatcher::extractNamesFromLambda(node))
if (data.private_aliases.insert(name).second)
{
if (column_default->kind == ColumnDefaultKind::Alias)
{
const auto alias_columns = data.columns.getAliases();
for (const auto & alias_column : alias_columns)
{
if (alias_column.name == *column_name)
{
ast = addTypeConversionToAST(column_default->expression->clone(), alias_column.type->getName());
//revisit ast to track recursive alias columns
Visitor(data).visit(ast);
break;
}
}
}
local_aliases.push_back(name);
}
/// visit child with masked local aliases
Visitor(data).visit(node.arguments->children[1]);
for (const auto & name : local_aliases)
data.private_aliases.erase(name);
}
}
void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
{
if (auto column_name = IdentifierSemantic::getColumnName(node))
{
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name))
return;
const auto & col = data.columns.get(*column_name);
if (col.default_desc.kind == ColumnDefaultKind::Alias)
{
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
auto str = queryToString(ast);
//revisit ast to track recursive alias columns
Visitor(data).visit(ast);
}
}
}
}

View File

@ -1,6 +1,6 @@
#pragma once
#include <Interpreters/Aliases.h>
#include <Core/Names.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Storages/ColumnsDescription.h>
@ -10,6 +10,8 @@ namespace DB
class IAST;
using ASTPtr = std::shared_ptr<IAST>;
class IDataType;
class ASTFunction;
class ASTIdentifier;
using DataTypePtr = std::shared_ptr<const IDataType>;
/// Visits AST node to rewrite alias columns in filter query
@ -22,14 +24,24 @@ public:
struct Data
{
const ColumnsDescription & columns;
const NameSet & forbidden_columns;
const Context & context;
Data(const ColumnsDescription & columns_)
NameSet private_aliases;
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)
: columns(columns_)
, forbidden_columns(forbidden_columns_)
, context(context_)
{}
};
static void visit(ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
private:
static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data);
static void visit(ASTFunction & node, ASTPtr & ast, Data & data);
};
using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor;

View File

@ -21,7 +21,6 @@
#include <Interpreters/InterpreterSetQuery.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/replaceAliasColumnsInFilter.h>
#include <Interpreters/addTypeConversionToAST.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/getTableExpressions.h>
@ -31,7 +30,7 @@
#include <Interpreters/JoinSwitcher.h>
#include <Interpreters/JoinedTables.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Processors/Pipe.h>
#include <Processors/Sources/SourceFromInputStream.h>
@ -1183,9 +1182,10 @@ void InterpreterSelectQuery::executeFetchColumns(
else // It's possible to optimize count() given only partition predicates
{
SelectQueryInfo temp_query_info;
temp_query_info.query = replaceAliasColumnsInFilter(query_ptr->clone(), storage->getInMemoryMetadata().getColumns());
temp_query_info.query = query_ptr;
temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
temp_query_info.sets = query_analyzer->getPreparedSets();
num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
}
if (num_rows)
@ -1292,11 +1292,10 @@ void InterpreterSelectQuery::executeFetchColumns(
if (is_alias)
{
auto column_decl = storage_columns.get(column);
/// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer).
column_expr = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName());
column_expr = column_default->expression->clone();
// recursive visit for alias to alias
ColumnAliasesVisitor::Data data(storage_columns);
ColumnAliasesVisitor(data).visit(column_expr);
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context);
column_expr = setAlias(column_expr, column);
}
else
@ -1509,7 +1508,7 @@ void InterpreterSelectQuery::executeFetchColumns(
getSortDescriptionFromGroupBy(query),
query_info.syntax_analyzer_result);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, *context);
}
StreamLocalLimits limits;

View File

@ -18,6 +18,7 @@
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/TreeOptimizer.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -367,6 +368,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
}
}
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
/// There can not be aggregate functions inside the WHERE and PREWHERE.
@ -512,8 +514,8 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column)
== partition_source_columns.end())
{
optimize_trivial_count = false;
break;
optimize_trivial_count = false;
break;
}
}
}
@ -591,6 +593,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
required_source_columns.swap(source_columns);
}
NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const
{
NameSet forbidden_columns;
for (const auto & elem : array_join_result_to_source)
forbidden_columns.insert(elem.first);
return forbidden_columns;
}
TreeRewriterResultPtr TreeRewriter::analyzeSelect(
ASTPtr & query,
@ -654,6 +663,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
/// rewrite filters for select query, must after getArrayJoinedColumns
if (settings.optimize_alias_column_prediction && result.metadata_snapshot)
{
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);
}
result.aggregates = getAggregates(query, *select_query);
result.collectUsedColumns(query, true);
result.ast_join = select_query->join();
@ -702,7 +717,7 @@ TreeRewriterResultPtr TreeRewriter::analyze(
else
assertNoAggregates(query, "in wrong place");
result.collectUsedColumns(query, false);
result.collectUsedColumns(query ,false);
return std::make_shared<const TreeRewriterResult>(result);
}

View File

@ -71,6 +71,7 @@ struct TreeRewriterResult
void collectSourceColumns(bool add_special);
void collectUsedColumns(const ASTPtr & query, bool is_select);
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
NameSet getArrayJoinSourceNameSet() const;
const Scalars & getScalars() const { return scalars; }
};

View File

@ -4,11 +4,20 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTWithAlias.h>
#include <Storages/ColumnsDescription.h>
#include <Interpreters/Context.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
namespace DB
{
namespace ErrorCodes
{
extern const int THERE_IS_NO_DEFAULT_VALUE;
}
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
{
auto func = makeASTFunction("CAST", ast, std::make_shared<ASTLiteral>(type_name));
@ -23,4 +32,23 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
return func;
}
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context)
{
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, all_columns);
const auto actions = ExpressionAnalyzer(ast, syntax_analyzer_result, context).getActions(true);
for (const auto & action : actions->getActions())
if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
auto block = actions->getSampleBlock();
auto desc_type = block.getByName(ast->getColumnName()).type;
if (desc_type->getName() != type_name)
return addTypeConversionToAST(std::move(ast), type_name);
return std::move(ast);
}
}

View File

@ -2,6 +2,7 @@
#include <common/types.h>
#include <Parsers/IAST_fwd.h>
#include <Interpreters/Context.h>
namespace DB
@ -10,4 +11,7 @@ namespace DB
/// It will produce an expression with CAST to get an AST with the required type.
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name);
// If same type, then ignore the wrapper of CAST function
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context);
}

View File

@ -1,22 +0,0 @@
#include <Interpreters/replaceAliasColumnsInFilter.h>
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Storages/ColumnsDescription.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns)
{
auto & temp_select = ast->as<ASTSelectQuery &>();
ColumnAliasesVisitor::Data aliase_column_data(columns);
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
if (temp_select.where())
aliase_column_visitor.visit(temp_select.refWhere());
if (temp_select.prewhere())
aliase_column_visitor.visit(temp_select.refPrewhere());
return std::move(ast);
}
}

View File

@ -1,12 +0,0 @@
#pragma once
#include <common/types.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ColumnsDescription;
ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns);
}

View File

@ -0,0 +1,16 @@
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Storages/ColumnsDescription.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context)
{
ColumnAliasesVisitor::Data aliase_column_data(columns, forbidden_columns, context);
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
aliase_column_visitor.visit(ast);
}
}

View File

@ -0,0 +1,14 @@
#pragma once
#include <common/types.h>
#include <Core/Names.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ColumnsDescription;
class Context;
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context);
}

View File

@ -153,7 +153,7 @@ SRCS(
interpretSubquery.cpp
join_common.cpp
loadMetadata.cpp
replaceAliasColumnsInFilter.cpp
replaceAliasColumnsInQuery.cpp
sortBlock.cpp
)

View File

@ -50,6 +50,8 @@ public:
ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); }
ASTPtr & refWhere() { return getExpression(Expression::WHERE); }
ASTPtr & refHaving() { return getExpression(Expression::HAVING); }
ASTPtr & refOrderBy() { return getExpression(Expression::ORDER_BY); }
ASTPtr & refGroupBy() { return getExpression(Expression::GROUP_BY); }
const ASTPtr with() const { return getExpression(Expression::WITH); }
const ASTPtr select() const { return getExpression(Expression::SELECT); }

View File

@ -22,8 +22,6 @@
#include <Parsers/ASTSampleRatio.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ColumnAliasesVisitor.h>
#include <Interpreters/replaceAliasColumnsInFilter.h>
#include <Interpreters/Context.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/QueryPlan/QueryPlan.h>
@ -211,14 +209,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
const auto & primary_key = metadata_snapshot->getPrimaryKey();
Names primary_key_columns = primary_key.column_names;
// query_info_for_index is a cloned SelectQueryInfo just for index
SelectQueryInfo query_info_for_index = query_info;
if (!metadata_snapshot->getColumns().getAliases().empty())
{
query_info_for_index.query = replaceAliasColumnsInFilter(query_info.query->clone(), metadata_snapshot->getColumns());
}
KeyCondition key_condition(query_info_for_index, context, primary_key_columns, primary_key.expression);
KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression);
if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue())
{
@ -230,8 +221,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
std::optional<PartitionPruner> partition_pruner;
if (data.minmax_idx_expr)
{
minmax_idx_condition.emplace(query_info_for_index, context, data.minmax_idx_columns, data.minmax_idx_expr);
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info_for_index, context, false /* strict */);
minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr);
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */);
if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless()))
{
@ -295,6 +286,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
RelativeSize relative_sample_offset = 0;
const auto & select = query_info.query->as<ASTSelectQuery &>();
auto select_sample_size = select.sampleSize();
auto select_sample_offset = select.sampleOffset();
@ -567,7 +559,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
auto index_helper = MergeTreeIndexFactory::instance().get(index);
auto condition = index_helper->createIndexCondition(query_info_for_index, context);
auto condition = index_helper->createIndexCondition(query_info, context);
if (!condition->alwaysUnknownOrTrue())
useful_indices.emplace_back(index_helper, condition);
}

View File

@ -1,7 +1,9 @@
#include <Storages/ReadInOrderOptimizer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Functions/IFunction.h>
namespace DB
@ -26,11 +28,10 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
/// Do not analyze joined columns.
/// They may have aliases and come to description as is.
/// We can mismatch them with order key columns at stage of fetching columns.
for (const auto & elem : syntax_result->array_join_result_to_source)
forbidden_columns.insert(elem.first);
forbidden_columns = syntax_result->getArrayJoinSourceNameSet();
}
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot) const
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const
{
Names sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
if (!metadata_snapshot->hasSortingKey())
@ -40,6 +41,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
int read_direction = required_sort_description.at(0).direction;
size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size());
auto aliase_columns = metadata_snapshot->getColumns().getAliases();
for (size_t i = 0; i < prefix_size; ++i)
{
@ -48,60 +50,92 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
/// Optimize in case of exact match with order key element
/// or in some simple cases when order key element is wrapped into monotonic function.
int current_direction = required_sort_description[i].direction;
if (required_sort_description[i].column_name == sorting_key_columns[i] && current_direction == read_direction)
order_key_prefix_descr.push_back(required_sort_description[i]);
else
auto apply_order_judge = [&] (const ExpressionActions::Actions & actions, const String & sort_column)
{
/// Allow only one simple monotonic functions with one argument
bool found_function = false;
for (const auto & action : elements_actions[i]->getActions())
int current_direction = required_sort_description[i].direction;
/// For the path: order by (sort_column, ...)
if (sort_column == sorting_key_columns[i] && current_direction == read_direction)
{
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
continue;
if (found_function)
{
current_direction = 0;
break;
}
else
found_function = true;
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i])
{
current_direction = 0;
break;
}
const auto & func = *action.node->function_base;
if (!func.hasInformationAboutMonotonicity())
{
current_direction = 0;
break;
}
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
if (!monotonicity.is_monotonic)
{
current_direction = 0;
break;
}
else if (!monotonicity.is_positive)
current_direction *= -1;
return true;
}
/// For the path: order by (function(sort_column), ...)
/// Allow only one simple monotonic functions with one argument
/// Why not allow multi monotonic functions?
else
{
bool found_function = false;
if (!found_function)
current_direction = 0;
for (const auto & action : actions)
{
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
{
continue;
}
if (!current_direction || (i > 0 && current_direction != read_direction))
break;
if (found_function)
{
current_direction = 0;
break;
}
else
found_function = true;
if (i == 0)
read_direction = current_direction;
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i])
{
current_direction = 0;
break;
}
order_key_prefix_descr.push_back(required_sort_description[i]);
const auto & func = *action.node->function_base;
if (!func.hasInformationAboutMonotonicity())
{
current_direction = 0;
break;
}
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
if (!monotonicity.is_monotonic)
{
current_direction = 0;
break;
}
else if (!monotonicity.is_positive)
current_direction *= -1;
}
if (!found_function)
current_direction = 0;
if (!current_direction || (i > 0 && current_direction != read_direction))
return false;
if (i == 0)
read_direction = current_direction;
return true;
}
};
const auto & actions = elements_actions[i]->getActions();
bool ok;
/// check if it's alias column
/// currently we only support alias column without any function wrapper
if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name))
{
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context);
auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll());
const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true);
const auto & alias_actions = expression_analyzer->getActions();
ok = apply_order_judge(alias_actions, column_expr->getColumnName());
}
else
ok = apply_order_judge(actions, required_sort_description[i].column_name);
if (ok)
order_key_prefix_descr.push_back(required_sort_description[i]);
}
if (order_key_prefix_descr.empty())

View File

@ -12,6 +12,8 @@ namespace DB
* common prefix, which is needed for
* performing reading in order of PK.
*/
class Context;
class ReadInOrderOptimizer
{
public:
@ -20,7 +22,7 @@ public:
const SortDescription & required_sort_description,
const TreeRewriterResultPtr & syntax_result);
InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot) const;
InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const;
private:
/// Actions for every element of order expression to analyze functions for monotonicity
@ -28,5 +30,4 @@ private:
NameSet forbidden_columns;
SortDescription required_sort_description;
};
}

View File

@ -195,7 +195,7 @@ void StorageBuffer::read(
if (dst_has_same_structure)
{
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot, context);
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
destination->read(

View File

@ -136,7 +136,7 @@ void StorageMaterializedView::read(
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
if (query_info.order_optimizer)
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
storage->read(query_plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);

View File

@ -211,7 +211,7 @@ Pipe StorageMerge::read(
{
auto storage_ptr = std::get<0>(*it);
auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot);
auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, context);
if (it == selected_tables.begin())
input_sorting_info = current_info;
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))

View File

@ -15,6 +15,20 @@ alias2alias
1
1
1
second_index
array-join
1
0 0
lambda
1
optimize_read_in_order
2020-01-01
optimize_aggregation_in_order
2020-01-01 10
2020-01-02 10
2020-01-03 10
2020-01-01 10
2020-01-02 10
2020-01-03 10
second-index
1
1

View File

@ -1,5 +1,5 @@
DROP TABLE IF EXISTS table_with_alias_column;
CREATE TABLE table_with_alias_column
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table
(
`timestamp` DateTime,
`value` UInt64,
@ -13,22 +13,21 @@ PARTITION BY toYYYYMMDD(timestamp)
ORDER BY timestamp SETTINGS index_granularity = 1;
INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10);
INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10);
INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10);
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10);
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10);
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10);
set optimize_alias_column_prediction = 1;
SELECT 'test-partition-prune';
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10;
SELECT t = '2020-01-03' FROM (SELECT day as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11;
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day, day as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10;
SELECT t = '2020-01-03' FROM (SELECT day AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11;
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day, day AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
SELECT 'test-join';
SELECT day = '2020-01-03'
FROM
(
@ -38,39 +37,61 @@ FROM
INNER JOIN
(
SELECT day
FROM table_with_alias_column
FROM test_table
WHERE day = '2020-01-03'
GROUP BY day SETTINGS max_rows_to_read = 11
) AS b ON a.day = b.day;
GROUP BY day
) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11;
SELECT day = '2020-01-01'
FROM
(
SELECT day
FROM table_with_alias_column
FROM test_table
WHERE day = '2020-01-01'
GROUP BY day SETTINGS max_rows_to_read = 11
GROUP BY day
) AS a
INNER JOIN
(
SELECT toDate('2020-01-01') AS day
FROM numbers(1)
) AS b ON a.day = b.day;
) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11;
SELECT 'alias2alias';
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10;
SELECT t = '2020-01-03' FROM (SELECT day1 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT t = '2020-01-03' FROM (SELECT day2 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-03' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11;
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day1, day1 as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10;
DROP TABLE table_with_alias_column;
SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10;
SELECT t = '2020-01-03' FROM (SELECT day1 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT t = '2020-01-03' FROM (SELECT day2 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-03' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11;
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day1, day1 AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10;
SELECT 'second_index';
ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3];
ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array;
SELECT 'array-join';
set max_rows_to_read = 10;
SELECT count() == 10 FROM test_table WHERE day = '2020-01-01';
SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01');
SELECT 'lambda';
-- lambda parameters in filter should not be rewrite
SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03';
set max_rows_to_read = 0;
-- how to test it? currently just check logs, eg: 00940_order_by_read_in_order
SELECT 'optimize_read_in_order';
SET optimize_read_in_order = 1;
SELECT day AS s FROM test_table ORDER BY s LIMIT 1;
SELECT 'optimize_aggregation_in_order';
SET optimize_aggregation_in_order = 1;
SELECT day, count() AS s FROM test_table GROUP BY day;
SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp);
DROP TABLE test_table;
SELECT 'second-index';
DROP TABLE IF EXISTS test_index;
CREATE TABLE test_index
(
@ -84,6 +105,8 @@ PRIMARY KEY tuple()
ORDER BY key_string SETTINGS index_granularity = 1;
INSERT INTO test_index SELECT * FROM numbers(10);
SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1 SETTINGS max_rows_to_read = 10;
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1 SETTINGS max_rows_to_read = 10;
set max_rows_to_read = 1;
SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1;
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1;
DROP TABLE IF EXISTS test_index;