Analyze ActionsDAG in ExpressionStep

+ check if sorting order can be kept from prevous step
+ EXPLAIN PLAN header=1 contains Sort Mode and Sort Description for each step
+ some tests
+ fix some review comments
This commit is contained in:
Igor Nikonov 2022-08-04 19:30:25 +00:00
parent 8eafca65c9
commit 0a659f5ab8
10 changed files with 224 additions and 29 deletions

View File

@ -46,8 +46,10 @@ public:
int compare(const char * str1, size_t length1, const char * str2, size_t length2) const;
const std::string & getLocale() const;
private:
bool operator==(const Collator & other) const { return this->getLocale() == other.getLocale(); }
private:
std::string locale;
UCollator * collator;
};

View File

@ -67,7 +67,7 @@ struct SortColumnDescription
static bool compareCollators(const std::shared_ptr<Collator> & a, const std::shared_ptr<Collator> & b)
{
if (unlikely(a && b))
return a->getLocale() == b->getLocale();
return *a == *b;
return a == b;
}

View File

@ -11,6 +11,7 @@
#include <Interpreters/Context.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Core/SortDescription.h>
#include <stack>
#include <base/sort.h>
@ -1923,4 +1924,89 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
return actions;
}
bool ActionsDAG::isSortingPreserved(const SortDescription & sort_description) const
{
// traverse the node tree and check if there is any non-monotonic function
auto node_preserve_sorting = [&](const Node * column) -> bool
{
const Field field{};
std::unordered_set<const Node *> visited_nodes;
bool column_found = false;
for (const auto & head : nodes)
{
const auto * root = &head;
if (root == column)
continue;
std::stack<const Node *> dfs;
dfs.push(root);
std::stack<const Node *> backtrace;
while (!dfs.empty())
{
const auto * node = dfs.top();
dfs.pop();
backtrace.push(node);
/// if found column
if (node == column)
{
column_found = true;
backtrace.pop(); /// pop column itself
/// walk back to root and check functions
while (!backtrace.empty())
{
const auto * current = backtrace.top();
backtrace.pop();
if (current->type == ActionType::FUNCTION)
{
auto func = current->function_base;
if (func)
{
if (!func->hasInformationAboutMonotonicity())
return false;
const auto & types = func->getArgumentTypes();
if (types.empty())
return false;
const auto monotonicity = func->getMonotonicityForRange(*types.front(), field, field);
if (!monotonicity.is_always_monotonic)
return false;
}
}
}
}
for (const auto * child : node->children)
{
if (!visited_nodes.contains(child))
{
dfs.push(child);
visited_nodes.insert(child);
}
}
}
}
return column_found;
};
for (const auto & column_sort_desc : sort_description)
{
const auto * node = tryFindInIndex(column_sort_desc.column_name);
if (node && node->type == ActionsDAG::ActionType::INPUT)
{
if (!node_preserve_sorting(node))
return false;
}
}
return true;
}
}

View File

@ -33,6 +33,8 @@ namespace JSONBuilder
using ItemPtr = std::unique_ptr<IItem>;
}
class SortDescription;
/// Directed acyclic graph of expressions.
/// This is an intermediate representation of actions which is usually built from expression list AST.
/// Node of DAG describe calculation of a single column with known type, name, and constant value (if applicable).
@ -73,7 +75,7 @@ public:
DataTypePtr result_type;
FunctionOverloadResolverPtr function_builder;
/// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
/// Can be used to get function signature or properties like monotonicity.
FunctionBasePtr function_base;
/// Prepared function which is used in function execution.
ExecutableFunctionPtr function;
@ -301,6 +303,8 @@ public:
const Names & available_inputs,
const ColumnsWithTypeAndName & all_inputs);
bool isSortingPreserved(const SortDescription & sort_description) const;
private:
Node & addNode(Node node);

View File

@ -10,34 +10,18 @@
namespace DB
{
static bool isSortingPreserved(const SortDescription & sort_description, const ActionsDAGPtr & actions_dag)
static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const SortDescription & input_sort_desc)
{
for (const auto & column_sort_desc : sort_description)
{
const auto * node = actions_dag->tryFindInIndex(column_sort_desc.column_name);
if (node && node->type == ActionsDAG::ActionType::ALIAS)
{
// todo: check if alias keep order
return false;
}
}
return true;
}
static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const SortDescription& input_sort_desc)
{
return ITransformingStep::Traits
{
return ITransformingStep::Traits{
{
.preserves_distinct_columns = !actions->hasArrayJoin(),
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = isSortingPreserved(input_sort_desc, actions)
.preserves_sorting = !actions->hasArrayJoin() && actions->isSortingPreserved(input_sort_desc),
},
{
.preserves_number_of_rows = !actions->hasArrayJoin(),
}
};
}};
}
ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDAGPtr & actions_dag_)

View File

@ -325,8 +325,19 @@ static void explainStep(
elem.dumpNameAndType(settings.out);
}
}
settings.out.write('\n');
if (step.hasOutputStream() && step.getOutputStream().header)
{
settings.out << prefix << "Sort Mode: " << step.getOutputStream().sort_mode;
if (step.getOutputStream().sort_mode != DataStream::SortMode::None)
{
settings.out << " ( ";
dumpSortDescription(step.getOutputStream().sort_description, settings.out);
settings.out << " )";
}
settings.out.write('\n');
}
}
if (options.actions)

View File

@ -196,7 +196,7 @@ void SortingStep::mergeSorting(QueryPipelineBuilder & pipeline, const SortDescri
void SortingStep::fullSort(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, const UInt64 limit_, const bool skip_partial_sort)
{
if (!skip_partial_sort)
if (!skip_partial_sort || limit_)
{
pipeline.addSimpleTransform(
[&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr
@ -287,7 +287,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
if (input_sort_desc.hasPrefix(result_description))
{
LOG_DEBUG(getLogger(), "Almost FullSort");
fullSort(pipeline, result_description, limit, true);
const bool skip_partial_sort = true;
fullSort(pipeline, result_description, limit, skip_partial_sort);
return;
}
}

View File

@ -65,7 +65,7 @@ private:
QueryPipelineBuilder & pipeline,
const SortDescription & result_sort_desc,
UInt64 limit_,
bool skip_partial_sort = false);
bool skip_partial_sort = false); /// if chunks already sorted according to result_sort_desc, then skip chunk sorting
enum class Type
{

View File

@ -1,4 +1,73 @@
-- { echoOn }
SELECT a from optimize_sorting order by a;
0
1
2
3
4
5
6
7
8
9
-- queries with unary function in order by
SELECT a from optimize_sorting order by -a;
9
8
7
6
5
4
3
2
1
0
SELECT a from optimize_sorting order by toFloat64(a);
0
1
2
3
4
5
6
7
8
9
-- queries with non-unary function in order by
SELECT a from optimize_sorting order by a+1;
0
1
2
3
4
5
6
7
8
9
SELECT a from optimize_sorting order by a-1;
0
1
2
3
4
5
6
7
8
9
SELECT a from optimize_sorting order by sipHash64(a, 'a');
3
8
0
9
5
1
2
6
4
7
-- queries with aliases
SELECT a as a from optimize_sorting order by a;
0
1
@ -10,6 +79,28 @@ SELECT a as a from optimize_sorting order by a;
7
8
9
SELECT a+1 as a from optimize_sorting order by a;
1
2
3
4
5
6
7
8
9
10
SELECT toFloat64(a) as a from optimize_sorting order by a;
0
1
2
3
4
5
6
7
8
9
SELECT sipHash64(a) as a from optimize_sorting order by a;
9140302661501632497
9199082625845137542

View File

@ -1,8 +1,24 @@
set optimize_sorting_for_input_stream=1;
DROP TABLE IF EXISTS optimize_sorting;
CREATE TABLE optimize_sorting (a UInt64, b UInt64, c UInt64) ENGINE MergeTree() ORDER BY (a, b);
INSERT INTO optimize_sorting SELECT number, number%5, number%2 from numbers(10);
INSERT INTO optimize_sorting SELECT number, number%5, number%2 from numbers(0, 5);
INSERT INTO optimize_sorting SELECT number, number%5, number%2 from numbers(5, 5);
-- { echoOn }
SELECT a from optimize_sorting order by a;
-- queries with unary function in order by
SELECT a from optimize_sorting order by -a;
SELECT a from optimize_sorting order by toFloat64(a);
-- queries with non-unary function in order by
SELECT a from optimize_sorting order by a+1;
SELECT a from optimize_sorting order by a-1;
SELECT a from optimize_sorting order by sipHash64(a, 'a');
-- queries with aliases
SELECT a as a from optimize_sorting order by a;
SELECT a+1 as a from optimize_sorting order by a;
SELECT toFloat64(a) as a from optimize_sorting order by a;
SELECT sipHash64(a) as a from optimize_sorting order by a;
-- queries with filter+expression
-- SELECT a FROM (SELECT a FROM optimize_sorting) WHERE a != 0 ORDER BY a;
-- SELECT a FROM (SELECT sipHash64(a) AS a FROM optimize_sorting) WHERE a != 0 ORDER BY a;
-- { echoOff }
DROP TABLE IF EXISTS optimize_sorting;
-- DROP TABLE IF EXISTS optimize_sorting;