mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Analyze ActionsDAG in ExpressionStep
+ check if sorting order can be kept from prevous step + EXPLAIN PLAN header=1 contains Sort Mode and Sort Description for each step + some tests + fix some review comments
This commit is contained in:
parent
8eafca65c9
commit
0a659f5ab8
@ -46,8 +46,10 @@ public:
|
||||
int compare(const char * str1, size_t length1, const char * str2, size_t length2) const;
|
||||
|
||||
const std::string & getLocale() const;
|
||||
private:
|
||||
|
||||
bool operator==(const Collator & other) const { return this->getLocale() == other.getLocale(); }
|
||||
|
||||
private:
|
||||
std::string locale;
|
||||
UCollator * collator;
|
||||
};
|
||||
|
@ -67,7 +67,7 @@ struct SortColumnDescription
|
||||
static bool compareCollators(const std::shared_ptr<Collator> & a, const std::shared_ptr<Collator> & b)
|
||||
{
|
||||
if (unlikely(a && b))
|
||||
return a->getLocale() == b->getLocale();
|
||||
return *a == *b;
|
||||
|
||||
return a == b;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Core/SortDescription.h>
|
||||
|
||||
#include <stack>
|
||||
#include <base/sort.h>
|
||||
@ -1923,4 +1924,89 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
|
||||
return actions;
|
||||
}
|
||||
|
||||
bool ActionsDAG::isSortingPreserved(const SortDescription & sort_description) const
|
||||
{
|
||||
// traverse the node tree and check if there is any non-monotonic function
|
||||
auto node_preserve_sorting = [&](const Node * column) -> bool
|
||||
{
|
||||
const Field field{};
|
||||
std::unordered_set<const Node *> visited_nodes;
|
||||
|
||||
bool column_found = false;
|
||||
for (const auto & head : nodes)
|
||||
{
|
||||
const auto * root = &head;
|
||||
if (root == column)
|
||||
continue;
|
||||
|
||||
std::stack<const Node *> dfs;
|
||||
dfs.push(root);
|
||||
|
||||
std::stack<const Node *> backtrace;
|
||||
|
||||
while (!dfs.empty())
|
||||
{
|
||||
const auto * node = dfs.top();
|
||||
dfs.pop();
|
||||
backtrace.push(node);
|
||||
|
||||
/// if found column
|
||||
if (node == column)
|
||||
{
|
||||
column_found = true;
|
||||
|
||||
backtrace.pop(); /// pop column itself
|
||||
|
||||
/// walk back to root and check functions
|
||||
while (!backtrace.empty())
|
||||
{
|
||||
const auto * current = backtrace.top();
|
||||
backtrace.pop();
|
||||
|
||||
if (current->type == ActionType::FUNCTION)
|
||||
{
|
||||
auto func = current->function_base;
|
||||
if (func)
|
||||
{
|
||||
if (!func->hasInformationAboutMonotonicity())
|
||||
return false;
|
||||
|
||||
const auto & types = func->getArgumentTypes();
|
||||
if (types.empty())
|
||||
return false;
|
||||
|
||||
const auto monotonicity = func->getMonotonicityForRange(*types.front(), field, field);
|
||||
if (!monotonicity.is_always_monotonic)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto * child : node->children)
|
||||
{
|
||||
if (!visited_nodes.contains(child))
|
||||
{
|
||||
dfs.push(child);
|
||||
visited_nodes.insert(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return column_found;
|
||||
};
|
||||
|
||||
for (const auto & column_sort_desc : sort_description)
|
||||
{
|
||||
const auto * node = tryFindInIndex(column_sort_desc.column_name);
|
||||
if (node && node->type == ActionsDAG::ActionType::INPUT)
|
||||
{
|
||||
if (!node_preserve_sorting(node))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -33,6 +33,8 @@ namespace JSONBuilder
|
||||
using ItemPtr = std::unique_ptr<IItem>;
|
||||
}
|
||||
|
||||
class SortDescription;
|
||||
|
||||
/// Directed acyclic graph of expressions.
|
||||
/// This is an intermediate representation of actions which is usually built from expression list AST.
|
||||
/// Node of DAG describe calculation of a single column with known type, name, and constant value (if applicable).
|
||||
@ -73,7 +75,7 @@ public:
|
||||
DataTypePtr result_type;
|
||||
|
||||
FunctionOverloadResolverPtr function_builder;
|
||||
/// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
|
||||
/// Can be used to get function signature or properties like monotonicity.
|
||||
FunctionBasePtr function_base;
|
||||
/// Prepared function which is used in function execution.
|
||||
ExecutableFunctionPtr function;
|
||||
@ -301,6 +303,8 @@ public:
|
||||
const Names & available_inputs,
|
||||
const ColumnsWithTypeAndName & all_inputs);
|
||||
|
||||
bool isSortingPreserved(const SortDescription & sort_description) const;
|
||||
|
||||
private:
|
||||
Node & addNode(Node node);
|
||||
|
||||
|
@ -10,34 +10,18 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static bool isSortingPreserved(const SortDescription & sort_description, const ActionsDAGPtr & actions_dag)
|
||||
static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const SortDescription & input_sort_desc)
|
||||
{
|
||||
for (const auto & column_sort_desc : sort_description)
|
||||
{
|
||||
const auto * node = actions_dag->tryFindInIndex(column_sort_desc.column_name);
|
||||
if (node && node->type == ActionsDAG::ActionType::ALIAS)
|
||||
{
|
||||
// todo: check if alias keep order
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const SortDescription& input_sort_desc)
|
||||
{
|
||||
return ITransformingStep::Traits
|
||||
{
|
||||
return ITransformingStep::Traits{
|
||||
{
|
||||
.preserves_distinct_columns = !actions->hasArrayJoin(),
|
||||
.returns_single_stream = false,
|
||||
.preserves_number_of_streams = true,
|
||||
.preserves_sorting = isSortingPreserved(input_sort_desc, actions)
|
||||
.preserves_sorting = !actions->hasArrayJoin() && actions->isSortingPreserved(input_sort_desc),
|
||||
},
|
||||
{
|
||||
.preserves_number_of_rows = !actions->hasArrayJoin(),
|
||||
}
|
||||
};
|
||||
}};
|
||||
}
|
||||
|
||||
ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDAGPtr & actions_dag_)
|
||||
|
@ -325,8 +325,19 @@ static void explainStep(
|
||||
elem.dumpNameAndType(settings.out);
|
||||
}
|
||||
}
|
||||
|
||||
settings.out.write('\n');
|
||||
|
||||
if (step.hasOutputStream() && step.getOutputStream().header)
|
||||
{
|
||||
settings.out << prefix << "Sort Mode: " << step.getOutputStream().sort_mode;
|
||||
if (step.getOutputStream().sort_mode != DataStream::SortMode::None)
|
||||
{
|
||||
settings.out << " ( ";
|
||||
dumpSortDescription(step.getOutputStream().sort_description, settings.out);
|
||||
settings.out << " )";
|
||||
}
|
||||
settings.out.write('\n');
|
||||
}
|
||||
}
|
||||
|
||||
if (options.actions)
|
||||
|
@ -196,7 +196,7 @@ void SortingStep::mergeSorting(QueryPipelineBuilder & pipeline, const SortDescri
|
||||
|
||||
void SortingStep::fullSort(QueryPipelineBuilder & pipeline, const SortDescription & result_sort_desc, const UInt64 limit_, const bool skip_partial_sort)
|
||||
{
|
||||
if (!skip_partial_sort)
|
||||
if (!skip_partial_sort || limit_)
|
||||
{
|
||||
pipeline.addSimpleTransform(
|
||||
[&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr
|
||||
@ -287,7 +287,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
|
||||
if (input_sort_desc.hasPrefix(result_description))
|
||||
{
|
||||
LOG_DEBUG(getLogger(), "Almost FullSort");
|
||||
fullSort(pipeline, result_description, limit, true);
|
||||
const bool skip_partial_sort = true;
|
||||
fullSort(pipeline, result_description, limit, skip_partial_sort);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ private:
|
||||
QueryPipelineBuilder & pipeline,
|
||||
const SortDescription & result_sort_desc,
|
||||
UInt64 limit_,
|
||||
bool skip_partial_sort = false);
|
||||
bool skip_partial_sort = false); /// if chunks already sorted according to result_sort_desc, then skip chunk sorting
|
||||
|
||||
enum class Type
|
||||
{
|
||||
|
@ -1,4 +1,73 @@
|
||||
-- { echoOn }
|
||||
SELECT a from optimize_sorting order by a;
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
-- queries with unary function in order by
|
||||
SELECT a from optimize_sorting order by -a;
|
||||
9
|
||||
8
|
||||
7
|
||||
6
|
||||
5
|
||||
4
|
||||
3
|
||||
2
|
||||
1
|
||||
0
|
||||
SELECT a from optimize_sorting order by toFloat64(a);
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
-- queries with non-unary function in order by
|
||||
SELECT a from optimize_sorting order by a+1;
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
SELECT a from optimize_sorting order by a-1;
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
SELECT a from optimize_sorting order by sipHash64(a, 'a');
|
||||
3
|
||||
8
|
||||
0
|
||||
9
|
||||
5
|
||||
1
|
||||
2
|
||||
6
|
||||
4
|
||||
7
|
||||
-- queries with aliases
|
||||
SELECT a as a from optimize_sorting order by a;
|
||||
0
|
||||
1
|
||||
@ -10,6 +79,28 @@ SELECT a as a from optimize_sorting order by a;
|
||||
7
|
||||
8
|
||||
9
|
||||
SELECT a+1 as a from optimize_sorting order by a;
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
SELECT toFloat64(a) as a from optimize_sorting order by a;
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
SELECT sipHash64(a) as a from optimize_sorting order by a;
|
||||
9140302661501632497
|
||||
9199082625845137542
|
||||
|
@ -1,8 +1,24 @@
|
||||
set optimize_sorting_for_input_stream=1;
|
||||
DROP TABLE IF EXISTS optimize_sorting;
|
||||
CREATE TABLE optimize_sorting (a UInt64, b UInt64, c UInt64) ENGINE MergeTree() ORDER BY (a, b);
|
||||
INSERT INTO optimize_sorting SELECT number, number%5, number%2 from numbers(10);
|
||||
INSERT INTO optimize_sorting SELECT number, number%5, number%2 from numbers(0, 5);
|
||||
INSERT INTO optimize_sorting SELECT number, number%5, number%2 from numbers(5, 5);
|
||||
-- { echoOn }
|
||||
SELECT a from optimize_sorting order by a;
|
||||
-- queries with unary function in order by
|
||||
SELECT a from optimize_sorting order by -a;
|
||||
SELECT a from optimize_sorting order by toFloat64(a);
|
||||
-- queries with non-unary function in order by
|
||||
SELECT a from optimize_sorting order by a+1;
|
||||
SELECT a from optimize_sorting order by a-1;
|
||||
SELECT a from optimize_sorting order by sipHash64(a, 'a');
|
||||
-- queries with aliases
|
||||
SELECT a as a from optimize_sorting order by a;
|
||||
SELECT a+1 as a from optimize_sorting order by a;
|
||||
SELECT toFloat64(a) as a from optimize_sorting order by a;
|
||||
SELECT sipHash64(a) as a from optimize_sorting order by a;
|
||||
-- queries with filter+expression
|
||||
-- SELECT a FROM (SELECT a FROM optimize_sorting) WHERE a != 0 ORDER BY a;
|
||||
-- SELECT a FROM (SELECT sipHash64(a) AS a FROM optimize_sorting) WHERE a != 0 ORDER BY a;
|
||||
-- { echoOff }
|
||||
DROP TABLE IF EXISTS optimize_sorting;
|
||||
-- DROP TABLE IF EXISTS optimize_sorting;
|
||||
|
Loading…
Reference in New Issue
Block a user