better support of read_in_order in case of fixed prefix of sorting key

This commit is contained in:
Anton Popov 2022-07-01 16:43:40 +00:00
parent 60dcae9af0
commit ef87e1207c
9 changed files with 238 additions and 60 deletions

View File

@ -2481,7 +2481,7 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input
auto finish_sorting_step = std::make_unique<SortingStep>(
query_plan.getCurrentDataStream(),
input_sorting_info->order_key_prefix_descr,
input_sorting_info->sort_description_for_merging,
output_order_descr,
settings.max_block_size,
limit);

View File

@ -104,7 +104,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
if (order_info)
{
read_from_merge_tree->setQueryInfoInputOrderInfo(order_info);
sorting->convertToFinishSorting(order_info->order_key_prefix_descr);
sorting->convertToFinishSorting(order_info->sort_description_for_merging);
}
return 0;

View File

@ -520,9 +520,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
if (need_preliminary_merge)
{
size_t fixed_prefix_size = input_order_info->order_key_fixed_prefix_descr.size();
size_t prefix_size = fixed_prefix_size + input_order_info->order_key_prefix_descr.size();
size_t prefix_size = input_order_info->used_prefix_of_sorting_key_size;
auto order_key_prefix_ast = metadata_for_reading->getSortingKey().expression_list_ast->clone();
order_key_prefix_ast->children.resize(prefix_size);

View File

@ -41,13 +41,13 @@ AggregatingInOrderTransform::AggregatingInOrderTransform(
/// We won't finalize states in order to merge same states (generated due to multi-thread execution) in AggregatingSortedTransform
res_header = params->getCustomHeader(/* final_= */ false);
for (size_t i = 0; i < group_by_info->order_key_prefix_descr.size(); ++i)
for (size_t i = 0; i < group_by_info->sort_description_for_merging.size(); ++i)
{
const auto & column_description = group_by_description_[i];
group_by_description.emplace_back(column_description, res_header.getPositionByName(column_description.column_name));
}
if (group_by_info->order_key_prefix_descr.size() < group_by_description_.size())
if (group_by_info->sort_description_for_merging.size() < group_by_description_.size())
{
group_by_key = true;
/// group_by_description may contains duplicates, so we use keys_size from Aggregator::params

View File

@ -5,9 +5,11 @@
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/replaceAliasColumnsInQuery.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/TableJoin.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
namespace DB
{
@ -20,26 +22,43 @@ namespace ErrorCodes
namespace
{
ASTPtr getFixedPoint(const ASTPtr & ast)
ASTPtr getFixedPoint(const ASTPtr & ast, const ContextPtr & context)
{
const auto * func = ast->as<ASTFunction>();
if (!func || func->name != "equals")
return nullptr;
if (!func->arguments || func->arguments->children.size() != 2)
return nullptr;
const auto & lhs = func->arguments->children[0];
const auto & rhs = func->arguments->children[1];
if (lhs->as<ASTLiteral>())
return rhs;
if (!lhs->as<ASTLiteral>() && !rhs->as<ASTLiteral>())
return nullptr;
if (rhs->as<ASTLiteral>())
return lhs;
/// Case of two literals doesn't make sense.
if (lhs->as<ASTLiteral>() && rhs->as<ASTLiteral>())
return nullptr;
return nullptr;
auto argument = lhs->as<ASTLiteral>() ? rhs : lhs;
while (const auto * arg_func = argument->as<ASTFunction>())
{
if (!arg_func->arguments || arg_func->arguments->children.size() != 1)
return nullptr;
auto func_resolver = FunctionFactory::instance().tryGet(arg_func->name, context);
if (!func_resolver || !func_resolver->isInjective({}))
return nullptr;
argument = func->arguments->children[0];
}
return argument->as<ASTIdentifier>() ? argument : nullptr;
}
size_t calculateFixedPrefixSize(
const ASTSelectQuery & query, const Names & sorting_key_columns)
NameSet getFixedSortingColumns(
const ASTSelectQuery & query, const Names & sorting_key_columns, const ContextPtr & context)
{
ASTPtr condition;
if (query.where() && query.prewhere())
@ -50,14 +69,15 @@ size_t calculateFixedPrefixSize(
condition = query.prewhere();
if (!condition)
return 0;
return {};
/// Convert condition to CNF for more convenient analysis.
auto cnf = TreeCNFConverter::tryConvertToCNF(condition);
if (!cnf)
return 0;
return {};
NameSet fixed_points;
NameSet sorting_key_columns_set(sorting_key_columns.begin(), sorting_key_columns.end());
/// If we met expression like 'column = x', where 'x' is literal,
/// in clause of size 1 in CNF, then we can guarantee
@ -66,22 +86,17 @@ size_t calculateFixedPrefixSize(
{
if (group.size() == 1 && !group.begin()->negative)
{
auto fixed_point = getFixedPoint(group.begin()->ast);
auto fixed_point = getFixedPoint(group.begin()->ast, context);
if (fixed_point)
fixed_points.insert(fixed_point->getColumnName());
{
auto column_name = fixed_point->getColumnName();
if (sorting_key_columns_set.contains(column_name))
fixed_points.insert(column_name);
}
}
});
size_t prefix_size = 0;
for (const auto & column_name : sorting_key_columns)
{
if (!fixed_points.contains(column_name))
break;
++prefix_size;
}
return prefix_size;
return fixed_points;
}
/// Optimize in case of exact match with order key element
@ -180,46 +195,52 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
const StorageMetadataPtr & metadata_snapshot,
const SortDescription & description,
const ManyExpressionActions & actions,
const ContextPtr & context,
UInt64 limit) const
{
auto sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
int read_direction = description.at(0).direction;
size_t fixed_prefix_size = calculateFixedPrefixSize(query, sorting_key_columns);
size_t descr_prefix_size = std::min(description.size(), sorting_key_columns.size() - fixed_prefix_size);
auto fixed_sorting_columns = getFixedSortingColumns(query, sorting_key_columns, context);
SortDescription order_key_prefix_descr;
order_key_prefix_descr.reserve(descr_prefix_size);
SortDescription sort_description_for_merging;
sort_description_for_merging.reserve(description.size());
for (size_t i = 0; i < descr_prefix_size; ++i)
size_t desc_pos = 0;
size_t key_pos = 0;
while (desc_pos < description.size() && key_pos < sorting_key_columns.size())
{
if (forbidden_columns.contains(description[i].column_name))
if (forbidden_columns.contains(description[desc_pos].column_name))
break;
int current_direction = matchSortDescriptionAndKey(
actions[i]->getActions(), description[i], sorting_key_columns[i + fixed_prefix_size]);
int current_direction = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
bool is_matched = current_direction && (desc_pos == 0 || current_direction == read_direction);
if (!is_matched)
{
if (fixed_sorting_columns.contains(sorting_key_columns[key_pos]))
{
++key_pos;
continue;
}
if (!current_direction || (i > 0 && current_direction != read_direction))
break;
}
if (i == 0)
if (desc_pos == 0)
read_direction = current_direction;
order_key_prefix_descr.push_back(required_sort_description[i]);
sort_description_for_merging.push_back(description[desc_pos]);
++desc_pos;
++key_pos;
}
if (order_key_prefix_descr.empty())
if (sort_description_for_merging.empty())
return {};
SortDescription order_key_fixed_prefix_descr;
order_key_fixed_prefix_descr.reserve(fixed_prefix_size);
for (size_t i = 0; i < fixed_prefix_size; ++i)
order_key_fixed_prefix_descr.emplace_back(sorting_key_columns[i], read_direction);
return std::make_shared<InputOrderInfo>(
std::move(order_key_fixed_prefix_descr),
std::move(order_key_prefix_descr),
read_direction, limit);
return std::make_shared<InputOrderInfo>(std::move(sort_description_for_merging), key_pos, read_direction, limit);
}
InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(
@ -254,10 +275,10 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(
aliases_actions[i] = expression_analyzer.getActions(true);
}
return getInputOrderImpl(metadata_snapshot, aliases_sort_description, aliases_actions, limit);
return getInputOrderImpl(metadata_snapshot, aliases_sort_description, aliases_actions, context, limit);
}
return getInputOrderImpl(metadata_snapshot, required_sort_description, elements_actions, limit);
return getInputOrderImpl(metadata_snapshot, required_sort_description, elements_actions, context, limit);
}
}

View File

@ -12,8 +12,6 @@ namespace DB
* common prefix, which is needed for
* performing reading in order of PK.
*/
class Context;
class ReadInOrderOptimizer
{
public:
@ -30,6 +28,7 @@ private:
const StorageMetadataPtr & metadata_snapshot,
const SortDescription & description,
const ManyExpressionActions & actions,
const ContextPtr & context,
UInt64 limit) const;
/// Actions for every element of order expression to analyze functions for monotonicity

View File

@ -87,17 +87,18 @@ struct FilterDAGInfo
struct InputOrderInfo
{
SortDescription order_key_fixed_prefix_descr;
SortDescription order_key_prefix_descr;
SortDescription sort_description_for_merging;
size_t used_prefix_of_sorting_key_size;
int direction;
UInt64 limit;
InputOrderInfo(
const SortDescription & order_key_fixed_prefix_descr_,
const SortDescription & order_key_prefix_descr_,
const SortDescription & sort_description_for_merging_,
size_t used_prefix_of_sorting_key_size_,
int direction_, UInt64 limit_)
: order_key_fixed_prefix_descr(order_key_fixed_prefix_descr_)
, order_key_prefix_descr(order_key_prefix_descr_)
: sort_description_for_merging(sort_description_for_merging_)
, used_prefix_of_sorting_key_size(used_prefix_of_sorting_key_size_)
, direction(direction_), limit(limit_)
{
}

View File

@ -0,0 +1,78 @@
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a, b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a, b, c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY a, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE b = 1 ORDER BY a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE b = 1 ORDER BY a, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE b = 1 ORDER BY b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY a, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, b, c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY b, a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY b, a, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY b, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY b, c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, b, c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY b, a
OK
SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY c, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY c, d
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY b, c
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 OR b = 1 ORDER BY a, b
OK
SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 OR b = 1 ORDER BY b
OK

View File

@ -0,0 +1,81 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
function check_if_optimzed()
{
query="$1"
echo $query
! $CLICKHOUSE_CLIENT --max_threads 8 --optimize_read_in_order 1 -q "EXPLAIN PIPELINE $query" | grep -q "MergeSorting"
}
function assert_optimized()
{
check_if_optimzed "$1" && echo "OK" || echo "FAIL"
}
function assert_not_optimized()
{
! check_if_optimzed "$1" && echo "OK" || echo "FAIL"
}
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_fixed_prefix"
$CLICKHOUSE_CLIENT -q "
CREATE TABLE t_fixed_prefix (a UInt32, b UInt32, c UInt32, d UInt32, e UInt32)
ENGINE = MergeTree ORDER BY (a, b, c, d)"
$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES t_fixed_prefix"
$CLICKHOUSE_CLIENT -q "INSERT INTO t_fixed_prefix SELECT number % 2, number % 10, number % 100, number % 1000, number FROM numbers(100000)"
$CLICKHOUSE_CLIENT -q "INSERT INTO t_fixed_prefix SELECT number % 2, number % 10, number % 100, number % 1000, number FROM numbers(100000)"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a, b"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a, b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY a, b, c, d"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY a, b"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY b"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE b = 1 ORDER BY a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE b = 1 ORDER BY a, c"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE b = 1 ORDER BY b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY a, b"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, b"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY a, b, c, d"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY b, a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY b, a, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND b = 1 ORDER BY c, d"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, b"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY b, d"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY b, c, d"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 AND c = 1 ORDER BY a, b, c, d"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY b"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY b, a"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix ORDER BY b, c"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY c"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY c, b"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 ORDER BY c, d"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY c, d"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE c = 1 ORDER BY b, c"
assert_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 OR b = 1 ORDER BY a, b"
assert_not_optimized "SELECT a, b, c, d, e FROM t_fixed_prefix WHERE a = 1 OR b = 1 ORDER BY b"