mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
update
This commit is contained in:
parent
20f0d39645
commit
9407028540
@ -364,6 +364,7 @@ class IColumn;
|
||||
M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
|
||||
M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
|
||||
M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
|
||||
M(Bool, optimize_alias_column_prediction, true, "If it is set to true, it will rewrite the filter query with aliased columns, this could help with partition prune and secondary indexes. And also help with optimize_aggregation_in_order and optimize_read_in_order", 0) \
|
||||
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
|
||||
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Interpreters/addTypeConversionToAST.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||
@ -8,43 +9,90 @@
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ColumnAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||
{
|
||||
if (const auto * f = node->as<ASTFunction>())
|
||||
{
|
||||
/// "lambda" visit children itself.
|
||||
if (f->name == "lambda")
|
||||
return false;
|
||||
}
|
||||
|
||||
return !(node->as<ASTTableExpression>()
|
||||
|| node->as<ASTSubquery>()
|
||||
|| node->as<ASTArrayJoin>()
|
||||
|| node->as<ASTSelectQuery>()
|
||||
|| node->as<ASTSelectWithUnionQuery>());
|
||||
}
|
||||
|
||||
void ColumnAliasesMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
auto aa = queryToString(ast);
|
||||
// If it's select query, only replace filters.
|
||||
if (auto * query = ast->as<ASTSelectQuery>())
|
||||
{
|
||||
if (query->where())
|
||||
Visitor(data).visit(query->refWhere());
|
||||
if (query->prewhere())
|
||||
Visitor(data).visit(query->refPrewhere());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto * node = ast->as<ASTFunction>())
|
||||
{
|
||||
visit(*node, ast, data);
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto * node = ast->as<ASTIdentifier>())
|
||||
{
|
||||
if (auto column_name = IdentifierSemantic::getColumnName(*node))
|
||||
{
|
||||
if (const auto column_default = data.columns.getDefault(*column_name))
|
||||
visit(*node, ast, data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnAliasesMatcher::visit(ASTFunction & node, ASTPtr & /*ast*/, Data & data)
|
||||
{
|
||||
/// Do not add formal parameters of the lambda expression
|
||||
if (node.name == "lambda")
|
||||
{
|
||||
Names local_aliases;
|
||||
for (const auto & name : RequiredSourceColumnsMatcher::extractNamesFromLambda(node))
|
||||
if (data.private_aliases.insert(name).second)
|
||||
{
|
||||
if (column_default->kind == ColumnDefaultKind::Alias)
|
||||
{
|
||||
const auto alias_columns = data.columns.getAliases();
|
||||
for (const auto & alias_column : alias_columns)
|
||||
{
|
||||
if (alias_column.name == *column_name)
|
||||
{
|
||||
ast = addTypeConversionToAST(column_default->expression->clone(), alias_column.type->getName());
|
||||
//revisit ast to track recursive alias columns
|
||||
Visitor(data).visit(ast);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
local_aliases.push_back(name);
|
||||
}
|
||||
/// visit child with masked local aliases
|
||||
Visitor(data).visit(node.arguments->children[1]);
|
||||
for (const auto & name : local_aliases)
|
||||
data.private_aliases.erase(name);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnAliasesMatcher::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto column_name = IdentifierSemantic::getColumnName(node))
|
||||
{
|
||||
if (data.forbidden_columns.count(*column_name) || data.private_aliases.count(*column_name))
|
||||
return;
|
||||
|
||||
const auto & col = data.columns.get(*column_name);
|
||||
if (col.default_desc.kind == ColumnDefaultKind::Alias)
|
||||
{
|
||||
ast = addTypeConversionToAST(col.default_desc.expression->clone(), col.type->getName(), data.columns.getAll(), data.context);
|
||||
auto str = queryToString(ast);
|
||||
//revisit ast to track recursive alias columns
|
||||
Visitor(data).visit(ast);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Aliases.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
@ -10,6 +10,8 @@ namespace DB
|
||||
class IAST;
|
||||
using ASTPtr = std::shared_ptr<IAST>;
|
||||
class IDataType;
|
||||
class ASTFunction;
|
||||
class ASTIdentifier;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
|
||||
/// Visits AST node to rewrite alias columns in filter query
|
||||
@ -22,14 +24,24 @@ public:
|
||||
struct Data
|
||||
{
|
||||
const ColumnsDescription & columns;
|
||||
const NameSet & forbidden_columns;
|
||||
const Context & context;
|
||||
|
||||
Data(const ColumnsDescription & columns_)
|
||||
NameSet private_aliases;
|
||||
|
||||
Data(const ColumnsDescription & columns_, const NameSet & forbidden_columns_, const Context & context_)
|
||||
: columns(columns_)
|
||||
, forbidden_columns(forbidden_columns_)
|
||||
, context(context_)
|
||||
{}
|
||||
};
|
||||
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
|
||||
|
||||
private:
|
||||
static void visit(ASTIdentifier & node, ASTPtr & ast, Data & data);
|
||||
static void visit(ASTFunction & node, ASTPtr & ast, Data & data);
|
||||
};
|
||||
|
||||
using ColumnAliasesVisitor = ColumnAliasesMatcher::Visitor;
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include <Interpreters/InterpreterSetQuery.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/convertFieldToType.h>
|
||||
#include <Interpreters/replaceAliasColumnsInFilter.h>
|
||||
#include <Interpreters/addTypeConversionToAST.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
@ -31,7 +30,7 @@
|
||||
#include <Interpreters/JoinSwitcher.h>
|
||||
#include <Interpreters/JoinedTables.h>
|
||||
#include <Interpreters/QueryAliasesVisitor.h>
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
@ -1183,9 +1182,10 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
else // It's possible to optimize count() given only partition predicates
|
||||
{
|
||||
SelectQueryInfo temp_query_info;
|
||||
temp_query_info.query = replaceAliasColumnsInFilter(query_ptr->clone(), storage->getInMemoryMetadata().getColumns());
|
||||
temp_query_info.query = query_ptr;
|
||||
temp_query_info.syntax_analyzer_result = syntax_analyzer_result;
|
||||
temp_query_info.sets = query_analyzer->getPreparedSets();
|
||||
|
||||
num_rows = storage->totalRowsByPartitionPredicate(temp_query_info, *context);
|
||||
}
|
||||
if (num_rows)
|
||||
@ -1292,11 +1292,10 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
if (is_alias)
|
||||
{
|
||||
auto column_decl = storage_columns.get(column);
|
||||
/// TODO: can make CAST only if the type is different (but requires SyntaxAnalyzer).
|
||||
column_expr = addTypeConversionToAST(column_default->expression->clone(), column_decl.type->getName());
|
||||
column_expr = column_default->expression->clone();
|
||||
|
||||
// recursive visit for alias to alias
|
||||
ColumnAliasesVisitor::Data data(storage_columns);
|
||||
ColumnAliasesVisitor(data).visit(column_expr);
|
||||
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), *context);
|
||||
column_expr = setAlias(column_expr, column);
|
||||
}
|
||||
else
|
||||
@ -1509,7 +1508,7 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
getSortDescriptionFromGroupBy(query),
|
||||
query_info.syntax_analyzer_result);
|
||||
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, *context);
|
||||
}
|
||||
|
||||
StreamLocalLimits limits;
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <Interpreters/ExpressionActions.h> /// getSmallestColumn()
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/TreeOptimizer.h>
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -367,6 +368,7 @@ void collectJoinedColumns(TableJoin & analyzed_join, const ASTSelectQuery & sele
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
|
||||
{
|
||||
/// There can not be aggregate functions inside the WHERE and PREWHERE.
|
||||
@ -512,8 +514,8 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column)
|
||||
== partition_source_columns.end())
|
||||
{
|
||||
optimize_trivial_count = false;
|
||||
break;
|
||||
optimize_trivial_count = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -591,6 +593,13 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
required_source_columns.swap(source_columns);
|
||||
}
|
||||
|
||||
NameSet TreeRewriterResult::getArrayJoinSourceNameSet() const
|
||||
{
|
||||
NameSet forbidden_columns;
|
||||
for (const auto & elem : array_join_result_to_source)
|
||||
forbidden_columns.insert(elem.first);
|
||||
return forbidden_columns;
|
||||
}
|
||||
|
||||
TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
ASTPtr & query,
|
||||
@ -654,6 +663,12 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
result.analyzed_join->table_join);
|
||||
collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
|
||||
|
||||
/// rewrite filters for select query, must after getArrayJoinedColumns
|
||||
if (settings.optimize_alias_column_prediction && result.metadata_snapshot)
|
||||
{
|
||||
replaceAliasColumnsInQuery(query, result.metadata_snapshot->getColumns(), result.getArrayJoinSourceNameSet(), context);
|
||||
}
|
||||
|
||||
result.aggregates = getAggregates(query, *select_query);
|
||||
result.collectUsedColumns(query, true);
|
||||
result.ast_join = select_query->join();
|
||||
@ -702,7 +717,7 @@ TreeRewriterResultPtr TreeRewriter::analyze(
|
||||
else
|
||||
assertNoAggregates(query, "in wrong place");
|
||||
|
||||
result.collectUsedColumns(query, false);
|
||||
result.collectUsedColumns(query ,false);
|
||||
return std::make_shared<const TreeRewriterResult>(result);
|
||||
}
|
||||
|
||||
|
@ -71,6 +71,7 @@ struct TreeRewriterResult
|
||||
void collectSourceColumns(bool add_special);
|
||||
void collectUsedColumns(const ASTPtr & query, bool is_select);
|
||||
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
|
||||
NameSet getArrayJoinSourceNameSet() const;
|
||||
const Scalars & getScalars() const { return scalars; }
|
||||
};
|
||||
|
||||
|
@ -4,11 +4,20 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTWithAlias.h>
|
||||
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int THERE_IS_NO_DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
|
||||
{
|
||||
auto func = makeASTFunction("CAST", ast, std::make_shared<ASTLiteral>(type_name));
|
||||
@ -23,4 +32,23 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name)
|
||||
return func;
|
||||
}
|
||||
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context)
|
||||
{
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(ast, all_columns);
|
||||
const auto actions = ExpressionAnalyzer(ast, syntax_analyzer_result, context).getActions(true);
|
||||
|
||||
for (const auto & action : actions->getActions())
|
||||
if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN)
|
||||
throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE);
|
||||
|
||||
auto block = actions->getSampleBlock();
|
||||
|
||||
auto desc_type = block.getByName(ast->getColumnName()).type;
|
||||
if (desc_type->getName() != type_name)
|
||||
return addTypeConversionToAST(std::move(ast), type_name);
|
||||
|
||||
return std::move(ast);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -10,4 +11,7 @@ namespace DB
|
||||
/// It will produce an expression with CAST to get an AST with the required type.
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name);
|
||||
|
||||
// If same type, then ignore the wrapper of CAST function
|
||||
ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const NamesAndTypesList & all_columns, const Context & context);
|
||||
|
||||
}
|
||||
|
@ -1,22 +0,0 @@
|
||||
#include <Interpreters/replaceAliasColumnsInFilter.h>
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns)
|
||||
{
|
||||
auto & temp_select = ast->as<ASTSelectQuery &>();
|
||||
ColumnAliasesVisitor::Data aliase_column_data(columns);
|
||||
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
|
||||
if (temp_select.where())
|
||||
aliase_column_visitor.visit(temp_select.refWhere());
|
||||
if (temp_select.prewhere())
|
||||
aliase_column_visitor.visit(temp_select.refPrewhere());
|
||||
|
||||
return std::move(ast);
|
||||
}
|
||||
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ColumnsDescription;
|
||||
ASTPtr replaceAliasColumnsInFilter(ASTPtr && ast, const ColumnsDescription & columns);
|
||||
|
||||
}
|
16
src/Interpreters/replaceAliasColumnsInQuery.cpp
Normal file
16
src/Interpreters/replaceAliasColumnsInQuery.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context)
|
||||
{
|
||||
ColumnAliasesVisitor::Data aliase_column_data(columns, forbidden_columns, context);
|
||||
ColumnAliasesVisitor aliase_column_visitor(aliase_column_data);
|
||||
aliase_column_visitor.visit(ast);
|
||||
}
|
||||
|
||||
}
|
14
src/Interpreters/replaceAliasColumnsInQuery.h
Normal file
14
src/Interpreters/replaceAliasColumnsInQuery.h
Normal file
@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/types.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ColumnsDescription;
|
||||
class Context;
|
||||
void replaceAliasColumnsInQuery(ASTPtr & ast, const ColumnsDescription & columns, const NameSet & forbidden_columns, const Context & context);
|
||||
|
||||
}
|
@ -153,7 +153,7 @@ SRCS(
|
||||
interpretSubquery.cpp
|
||||
join_common.cpp
|
||||
loadMetadata.cpp
|
||||
replaceAliasColumnsInFilter.cpp
|
||||
replaceAliasColumnsInQuery.cpp
|
||||
sortBlock.cpp
|
||||
|
||||
)
|
||||
|
@ -50,6 +50,8 @@ public:
|
||||
ASTPtr & refPrewhere() { return getExpression(Expression::PREWHERE); }
|
||||
ASTPtr & refWhere() { return getExpression(Expression::WHERE); }
|
||||
ASTPtr & refHaving() { return getExpression(Expression::HAVING); }
|
||||
ASTPtr & refOrderBy() { return getExpression(Expression::ORDER_BY); }
|
||||
ASTPtr & refGroupBy() { return getExpression(Expression::GROUP_BY); }
|
||||
|
||||
const ASTPtr with() const { return getExpression(Expression::WITH); }
|
||||
const ASTPtr select() const { return getExpression(Expression::SELECT); }
|
||||
|
@ -22,8 +22,6 @@
|
||||
#include <Parsers/ASTSampleRatio.h>
|
||||
#include <Parsers/parseIdentifierOrStringLiteral.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/ColumnAliasesVisitor.h>
|
||||
#include <Interpreters/replaceAliasColumnsInFilter.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/ConcatProcessor.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
@ -211,14 +209,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
const auto & primary_key = metadata_snapshot->getPrimaryKey();
|
||||
Names primary_key_columns = primary_key.column_names;
|
||||
|
||||
// query_info_for_index is a cloned SelectQueryInfo just for index
|
||||
SelectQueryInfo query_info_for_index = query_info;
|
||||
if (!metadata_snapshot->getColumns().getAliases().empty())
|
||||
{
|
||||
query_info_for_index.query = replaceAliasColumnsInFilter(query_info.query->clone(), metadata_snapshot->getColumns());
|
||||
}
|
||||
|
||||
KeyCondition key_condition(query_info_for_index, context, primary_key_columns, primary_key.expression);
|
||||
KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression);
|
||||
|
||||
if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue())
|
||||
{
|
||||
@ -230,8 +221,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
std::optional<PartitionPruner> partition_pruner;
|
||||
if (data.minmax_idx_expr)
|
||||
{
|
||||
minmax_idx_condition.emplace(query_info_for_index, context, data.minmax_idx_columns, data.minmax_idx_expr);
|
||||
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info_for_index, context, false /* strict */);
|
||||
minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr);
|
||||
partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */);
|
||||
|
||||
if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless()))
|
||||
{
|
||||
@ -295,6 +286,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
RelativeSize relative_sample_offset = 0;
|
||||
|
||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
||||
|
||||
auto select_sample_size = select.sampleSize();
|
||||
auto select_sample_offset = select.sampleOffset();
|
||||
|
||||
@ -567,7 +559,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
for (const auto & index : metadata_snapshot->getSecondaryIndices())
|
||||
{
|
||||
auto index_helper = MergeTreeIndexFactory::instance().get(index);
|
||||
auto condition = index_helper->createIndexCondition(query_info_for_index, context);
|
||||
auto condition = index_helper->createIndexCondition(query_info, context);
|
||||
if (!condition->alwaysUnknownOrTrue())
|
||||
useful_indices.emplace_back(index_helper, condition);
|
||||
}
|
||||
|
@ -1,7 +1,9 @@
|
||||
#include <Storages/ReadInOrderOptimizer.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
namespace DB
|
||||
@ -26,11 +28,10 @@ ReadInOrderOptimizer::ReadInOrderOptimizer(
|
||||
/// Do not analyze joined columns.
|
||||
/// They may have aliases and come to description as is.
|
||||
/// We can mismatch them with order key columns at stage of fetching columns.
|
||||
for (const auto & elem : syntax_result->array_join_result_to_source)
|
||||
forbidden_columns.insert(elem.first);
|
||||
forbidden_columns = syntax_result->getArrayJoinSourceNameSet();
|
||||
}
|
||||
|
||||
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot) const
|
||||
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const
|
||||
{
|
||||
Names sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
|
||||
if (!metadata_snapshot->hasSortingKey())
|
||||
@ -40,6 +41,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
|
||||
int read_direction = required_sort_description.at(0).direction;
|
||||
|
||||
size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size());
|
||||
auto aliase_columns = metadata_snapshot->getColumns().getAliases();
|
||||
|
||||
for (size_t i = 0; i < prefix_size; ++i)
|
||||
{
|
||||
@ -48,60 +50,92 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
|
||||
|
||||
/// Optimize in case of exact match with order key element
|
||||
/// or in some simple cases when order key element is wrapped into monotonic function.
|
||||
int current_direction = required_sort_description[i].direction;
|
||||
if (required_sort_description[i].column_name == sorting_key_columns[i] && current_direction == read_direction)
|
||||
order_key_prefix_descr.push_back(required_sort_description[i]);
|
||||
else
|
||||
auto apply_order_judge = [&] (const ExpressionActions::Actions & actions, const String & sort_column)
|
||||
{
|
||||
/// Allow only one simple monotonic functions with one argument
|
||||
bool found_function = false;
|
||||
for (const auto & action : elements_actions[i]->getActions())
|
||||
int current_direction = required_sort_description[i].direction;
|
||||
/// For the path: order by (sort_column, ...)
|
||||
if (sort_column == sorting_key_columns[i] && current_direction == read_direction)
|
||||
{
|
||||
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
|
||||
continue;
|
||||
|
||||
if (found_function)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
found_function = true;
|
||||
|
||||
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i])
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
const auto & func = *action.node->function_base;
|
||||
if (!func.hasInformationAboutMonotonicity())
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
|
||||
if (!monotonicity.is_monotonic)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
else if (!monotonicity.is_positive)
|
||||
current_direction *= -1;
|
||||
return true;
|
||||
}
|
||||
/// For the path: order by (function(sort_column), ...)
|
||||
/// Allow only one simple monotonic functions with one argument
|
||||
/// Why not allow multi monotonic functions?
|
||||
else
|
||||
{
|
||||
bool found_function = false;
|
||||
|
||||
if (!found_function)
|
||||
current_direction = 0;
|
||||
for (const auto & action : actions)
|
||||
{
|
||||
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!current_direction || (i > 0 && current_direction != read_direction))
|
||||
break;
|
||||
if (found_function)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
found_function = true;
|
||||
|
||||
if (i == 0)
|
||||
read_direction = current_direction;
|
||||
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i])
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
order_key_prefix_descr.push_back(required_sort_description[i]);
|
||||
const auto & func = *action.node->function_base;
|
||||
if (!func.hasInformationAboutMonotonicity())
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
|
||||
if (!monotonicity.is_monotonic)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
else if (!monotonicity.is_positive)
|
||||
current_direction *= -1;
|
||||
}
|
||||
|
||||
if (!found_function)
|
||||
current_direction = 0;
|
||||
|
||||
if (!current_direction || (i > 0 && current_direction != read_direction))
|
||||
return false;
|
||||
|
||||
if (i == 0)
|
||||
read_direction = current_direction;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const auto & actions = elements_actions[i]->getActions();
|
||||
bool ok;
|
||||
/// check if it's alias column
|
||||
/// currently we only support alias column without any function wrapper
|
||||
if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name))
|
||||
{
|
||||
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
|
||||
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context);
|
||||
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll());
|
||||
const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true);
|
||||
const auto & alias_actions = expression_analyzer->getActions();
|
||||
|
||||
ok = apply_order_judge(alias_actions, column_expr->getColumnName());
|
||||
}
|
||||
else
|
||||
ok = apply_order_judge(actions, required_sort_description[i].column_name);
|
||||
|
||||
if (ok)
|
||||
order_key_prefix_descr.push_back(required_sort_description[i]);
|
||||
}
|
||||
|
||||
if (order_key_prefix_descr.empty())
|
||||
|
@ -12,6 +12,8 @@ namespace DB
|
||||
* common prefix, which is needed for
|
||||
* performing reading in order of PK.
|
||||
*/
|
||||
class Context;
|
||||
|
||||
class ReadInOrderOptimizer
|
||||
{
|
||||
public:
|
||||
@ -20,7 +22,7 @@ public:
|
||||
const SortDescription & required_sort_description,
|
||||
const TreeRewriterResultPtr & syntax_result);
|
||||
|
||||
InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot) const;
|
||||
InputOrderInfoPtr getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const;
|
||||
|
||||
private:
|
||||
/// Actions for every element of order expression to analyze functions for monotonicity
|
||||
@ -28,5 +30,4 @@ private:
|
||||
NameSet forbidden_columns;
|
||||
SortDescription required_sort_description;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ void StorageBuffer::read(
|
||||
if (dst_has_same_structure)
|
||||
{
|
||||
if (query_info.order_optimizer)
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot);
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination_metadata_snapshot, context);
|
||||
|
||||
/// The destination table has the same structure of the requested columns and we can simply read blocks from there.
|
||||
destination->read(
|
||||
|
@ -136,7 +136,7 @@ void StorageMaterializedView::read(
|
||||
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
|
||||
|
||||
if (query_info.order_optimizer)
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot);
|
||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
|
||||
|
||||
storage->read(query_plan, column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
|
||||
|
||||
|
@ -211,7 +211,7 @@ Pipe StorageMerge::read(
|
||||
{
|
||||
auto storage_ptr = std::get<0>(*it);
|
||||
auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
|
||||
auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot);
|
||||
auto current_info = query_info.order_optimizer->getInputOrder(storage_metadata_snapshot, context);
|
||||
if (it == selected_tables.begin())
|
||||
input_sorting_info = current_info;
|
||||
else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info))
|
||||
|
@ -15,6 +15,20 @@ alias2alias
|
||||
1
|
||||
1
|
||||
1
|
||||
second_index
|
||||
array-join
|
||||
1
|
||||
0 0
|
||||
lambda
|
||||
1
|
||||
optimize_read_in_order
|
||||
2020-01-01
|
||||
optimize_aggregation_in_order
|
||||
2020-01-01 10
|
||||
2020-01-02 10
|
||||
2020-01-03 10
|
||||
2020-01-01 10
|
||||
2020-01-02 10
|
||||
2020-01-03 10
|
||||
second-index
|
||||
1
|
||||
1
|
||||
|
@ -1,5 +1,5 @@
|
||||
DROP TABLE IF EXISTS table_with_alias_column;
|
||||
CREATE TABLE table_with_alias_column
|
||||
DROP TABLE IF EXISTS test_table;
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
`timestamp` DateTime,
|
||||
`value` UInt64,
|
||||
@ -13,22 +13,21 @@ PARTITION BY toYYYYMMDD(timestamp)
|
||||
ORDER BY timestamp SETTINGS index_granularity = 1;
|
||||
|
||||
|
||||
INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10);
|
||||
|
||||
INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10);
|
||||
|
||||
INSERT INTO table_with_alias_column(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10);
|
||||
|
||||
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'), 1 FROM numbers(10);
|
||||
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10);
|
||||
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10);
|
||||
|
||||
set optimize_alias_column_prediction = 1;
|
||||
SELECT 'test-partition-prune';
|
||||
|
||||
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10;
|
||||
SELECT t = '2020-01-03' FROM (SELECT day as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
|
||||
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day = '2020-01-01' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11;
|
||||
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day, day as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
|
||||
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10;
|
||||
SELECT t = '2020-01-03' FROM (SELECT day AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
|
||||
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11;
|
||||
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day, day AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
|
||||
|
||||
|
||||
|
||||
SELECT 'test-join';
|
||||
|
||||
SELECT day = '2020-01-03'
|
||||
FROM
|
||||
(
|
||||
@ -38,39 +37,61 @@ FROM
|
||||
INNER JOIN
|
||||
(
|
||||
SELECT day
|
||||
FROM table_with_alias_column
|
||||
FROM test_table
|
||||
WHERE day = '2020-01-03'
|
||||
GROUP BY day SETTINGS max_rows_to_read = 11
|
||||
) AS b ON a.day = b.day;
|
||||
GROUP BY day
|
||||
) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11;
|
||||
|
||||
SELECT day = '2020-01-01'
|
||||
FROM
|
||||
(
|
||||
SELECT day
|
||||
FROM table_with_alias_column
|
||||
FROM test_table
|
||||
WHERE day = '2020-01-01'
|
||||
GROUP BY day SETTINGS max_rows_to_read = 11
|
||||
GROUP BY day
|
||||
) AS a
|
||||
INNER JOIN
|
||||
(
|
||||
SELECT toDate('2020-01-01') AS day
|
||||
FROM numbers(1)
|
||||
) AS b ON a.day = b.day;
|
||||
) AS b ON a.day = b.day SETTINGS max_rows_to_read = 11;
|
||||
|
||||
|
||||
SELECT 'alias2alias';
|
||||
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10;
|
||||
SELECT t = '2020-01-03' FROM (SELECT day1 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
|
||||
SELECT t = '2020-01-03' FROM (SELECT day2 as t FROM table_with_alias_column WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
|
||||
SELECT COUNT() = 10 FROM table_with_alias_column WHERE day1 = '2020-01-03' UNION ALL select 1 from numbers(1) SETTINGS max_rows_to_read = 11;
|
||||
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') as day1, day1 as t FROM table_with_alias_column PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
|
||||
SELECT day1 = '2020-01-04' FROM table_with_alias_column PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10;
|
||||
|
||||
DROP TABLE table_with_alias_column;
|
||||
SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-02' SETTINGS max_rows_to_read = 10;
|
||||
SELECT t = '2020-01-03' FROM (SELECT day1 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
|
||||
SELECT t = '2020-01-03' FROM (SELECT day2 AS t FROM test_table WHERE t = '2020-01-03' GROUP BY t SETTINGS max_rows_to_read = 10);
|
||||
SELECT COUNT() = 10 FROM test_table WHERE day1 = '2020-01-03' UNION ALL SELECT 1 FROM numbers(1) SETTINGS max_rows_to_read = 11;
|
||||
SELECT COUNT() = 0 FROM (SELECT toDate('2019-01-01') AS day1, day1 AS t FROM test_table PREWHERE t = '2020-01-03' WHERE t = '2020-01-03' GROUP BY t );
|
||||
SELECT day1 = '2020-01-04' FROM test_table PREWHERE day1 = '2020-01-04' WHERE day1 = '2020-01-04' GROUP BY day1 SETTINGS max_rows_to_read = 10;
|
||||
|
||||
|
||||
SELECT 'second_index';
|
||||
ALTER TABLE test_table add column array Array(UInt8) default [1, 2, 3];
|
||||
ALTER TABLE test_table add column struct.key Array(UInt8) default [2, 4, 6], add column struct.value Array(UInt8) alias array;
|
||||
|
||||
SELECT 'array-join';
|
||||
set max_rows_to_read = 10;
|
||||
SELECT count() == 10 FROM test_table WHERE day = '2020-01-01';
|
||||
SELECT sum(struct.key) == 30, sum(struct.value) == 30 FROM (SELECT struct.key, struct.value FROM test_table array join struct WHERE day = '2020-01-01');
|
||||
|
||||
SELECT 'lambda';
|
||||
-- lambda parameters in filter should not be rewrite
|
||||
SELECT count() == 10 FROM test_table WHERE arrayMap((day) -> day + 1, [1,2,3]) [1] = 2 AND day = '2020-01-03';
|
||||
|
||||
set max_rows_to_read = 0;
|
||||
-- how to test it? currently just check logs, eg: 00940_order_by_read_in_order
|
||||
SELECT 'optimize_read_in_order';
|
||||
SET optimize_read_in_order = 1;
|
||||
SELECT day AS s FROM test_table ORDER BY s LIMIT 1;
|
||||
|
||||
SELECT 'optimize_aggregation_in_order';
|
||||
SET optimize_aggregation_in_order = 1;
|
||||
SELECT day, count() AS s FROM test_table GROUP BY day;
|
||||
SELECT toDate(timestamp), count() AS s FROM test_table GROUP BY toDate(timestamp);
|
||||
|
||||
DROP TABLE test_table;
|
||||
|
||||
SELECT 'second-index';
|
||||
DROP TABLE IF EXISTS test_index;
|
||||
CREATE TABLE test_index
|
||||
(
|
||||
@ -84,6 +105,8 @@ PRIMARY KEY tuple()
|
||||
ORDER BY key_string SETTINGS index_granularity = 1;
|
||||
|
||||
INSERT INTO test_index SELECT * FROM numbers(10);
|
||||
SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1 SETTINGS max_rows_to_read = 10;
|
||||
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1 SETTINGS max_rows_to_read = 10;
|
||||
set max_rows_to_read = 1;
|
||||
SELECT COUNT() == 1 FROM test_index WHERE key_uint32 = 1;
|
||||
SELECT COUNT() == 1 FROM test_index WHERE toUInt32(key_string) = 1;
|
||||
DROP TABLE IF EXISTS test_index;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user