2019-12-10 23:18:24 +00:00
|
|
|
#include <Storages/ReadInOrderOptimizer.h>
|
2020-12-10 23:56:57 +00:00
|
|
|
|
2020-12-13 01:55:56 +00:00
|
|
|
#include <Interpreters/ExpressionActions.h>
|
|
|
|
#include <Interpreters/ExpressionAnalyzer.h>
|
|
|
|
#include <Interpreters/TreeRewriter.h>
|
|
|
|
#include <Interpreters/replaceAliasColumnsInQuery.h>
|
2020-12-10 23:56:57 +00:00
|
|
|
#include <Functions/IFunction.h>
|
|
|
|
#include <Interpreters/TableJoin.h>
|
2019-12-10 23:18:24 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
2020-03-19 11:34:54 +00:00
|
|
|
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
2019-12-10 23:18:24 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ReadInOrderOptimizer::ReadInOrderOptimizer(
|
|
|
|
const ManyExpressionActions & elements_actions_,
|
|
|
|
const SortDescription & required_sort_description_,
|
2020-07-22 17:13:05 +00:00
|
|
|
const TreeRewriterResultPtr & syntax_result)
|
2019-12-10 23:18:24 +00:00
|
|
|
: elements_actions(elements_actions_)
|
|
|
|
, required_sort_description(required_sort_description_)
|
|
|
|
{
|
|
|
|
if (elements_actions.size() != required_sort_description.size())
|
2019-12-11 01:34:39 +00:00
|
|
|
throw Exception("Sizes of sort description and actions are mismatched", ErrorCodes::LOGICAL_ERROR);
|
2019-12-10 23:18:24 +00:00
|
|
|
|
2019-12-20 13:15:17 +00:00
|
|
|
/// Do not analyze joined columns.
|
2020-01-11 09:50:41 +00:00
|
|
|
/// They may have aliases and come to description as is.
|
2019-12-20 13:15:17 +00:00
|
|
|
/// We can mismatch them with order key columns at stage of fetching columns.
|
2020-12-12 16:42:15 +00:00
|
|
|
forbidden_columns = syntax_result->getArrayJoinSourceNameSet();
|
2019-12-10 23:18:24 +00:00
|
|
|
}
|
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr & metadata_snapshot, const Context & context) const
|
2019-12-10 23:18:24 +00:00
|
|
|
{
|
2020-11-03 18:22:46 +00:00
|
|
|
Names sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
|
|
|
|
if (!metadata_snapshot->hasSortingKey())
|
2019-12-10 23:18:24 +00:00
|
|
|
return {};
|
|
|
|
|
|
|
|
SortDescription order_key_prefix_descr;
|
|
|
|
int read_direction = required_sort_description.at(0).direction;
|
|
|
|
|
|
|
|
size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size());
|
2020-12-12 16:42:15 +00:00
|
|
|
auto aliase_columns = metadata_snapshot->getColumns().getAliases();
|
2020-05-08 19:46:52 +00:00
|
|
|
|
2019-12-10 23:18:24 +00:00
|
|
|
for (size_t i = 0; i < prefix_size; ++i)
|
|
|
|
{
|
|
|
|
if (forbidden_columns.count(required_sort_description[i].column_name))
|
|
|
|
break;
|
|
|
|
|
|
|
|
/// Optimize in case of exact match with order key element
|
|
|
|
/// or in some simple cases when order key element is wrapped into monotonic function.
|
2020-12-12 16:42:15 +00:00
|
|
|
auto apply_order_judge = [&] (const ExpressionActions::Actions & actions, const String & sort_column)
|
2019-12-10 23:18:24 +00:00
|
|
|
{
|
2020-12-12 16:42:15 +00:00
|
|
|
int current_direction = required_sort_description[i].direction;
|
|
|
|
/// For the path: order by (sort_column, ...)
|
|
|
|
if (sort_column == sorting_key_columns[i] && current_direction == read_direction)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
/// For the path: order by (function(sort_column), ...)
|
2019-12-11 18:52:59 +00:00
|
|
|
/// Allow only one simple monotonic functions with one argument
|
2020-12-12 16:42:15 +00:00
|
|
|
/// Why not allow multi monotonic functions?
|
|
|
|
else
|
2019-12-10 23:18:24 +00:00
|
|
|
{
|
2020-12-12 16:42:15 +00:00
|
|
|
bool found_function = false;
|
2019-12-10 23:18:24 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
for (const auto & action : actions)
|
2019-12-10 23:18:24 +00:00
|
|
|
{
|
2020-12-12 16:42:15 +00:00
|
|
|
if (action.node->type != ActionsDAG::ActionType::FUNCTION)
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found_function)
|
|
|
|
{
|
|
|
|
current_direction = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
found_function = true;
|
|
|
|
|
|
|
|
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_columns[i])
|
|
|
|
{
|
|
|
|
current_direction = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto & func = *action.node->function_base;
|
|
|
|
if (!func.hasInformationAboutMonotonicity())
|
|
|
|
{
|
|
|
|
current_direction = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
|
|
|
|
if (!monotonicity.is_monotonic)
|
|
|
|
{
|
|
|
|
current_direction = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (!monotonicity.is_positive)
|
|
|
|
current_direction *= -1;
|
2019-12-10 23:18:24 +00:00
|
|
|
}
|
2020-05-08 19:46:52 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
if (!found_function)
|
2019-12-11 13:09:46 +00:00
|
|
|
current_direction = 0;
|
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
if (!current_direction || (i > 0 && current_direction != read_direction))
|
|
|
|
return false;
|
2020-05-08 19:46:52 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
if (i == 0)
|
|
|
|
read_direction = current_direction;
|
|
|
|
|
|
|
|
return true;
|
2019-12-10 23:18:24 +00:00
|
|
|
}
|
2020-12-12 16:42:15 +00:00
|
|
|
};
|
2019-12-10 23:18:24 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
const auto & actions = elements_actions[i]->getActions();
|
|
|
|
bool ok;
|
|
|
|
/// check if it's alias column
|
|
|
|
/// currently we only support alias column without any function wrapper
|
|
|
|
if (context.getSettingsRef().optimize_alias_column_prediction && aliase_columns.contains(required_sort_description[i].column_name))
|
|
|
|
{
|
|
|
|
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
|
|
|
|
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context);
|
2020-05-08 19:46:52 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
auto syntax_analyzer_result = TreeRewriter(context).analyze(column_expr, metadata_snapshot->getColumns().getAll());
|
|
|
|
const auto expression_analyzer = ExpressionAnalyzer(column_expr, syntax_analyzer_result, context).getActions(true);
|
|
|
|
const auto & alias_actions = expression_analyzer->getActions();
|
2020-05-08 19:46:52 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
ok = apply_order_judge(alias_actions, column_expr->getColumnName());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
ok = apply_order_judge(actions, required_sort_description[i].column_name);
|
2019-12-10 23:18:24 +00:00
|
|
|
|
2020-12-12 16:42:15 +00:00
|
|
|
if (ok)
|
2019-12-10 23:18:24 +00:00
|
|
|
order_key_prefix_descr.push_back(required_sort_description[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (order_key_prefix_descr.empty())
|
|
|
|
return {};
|
|
|
|
|
2020-05-13 13:49:10 +00:00
|
|
|
return std::make_shared<InputOrderInfo>(std::move(order_key_prefix_descr), read_direction);
|
2019-12-10 23:18:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|