mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #41701 from CurtizJ/fix-monotonic-order-by
Fix `ORDER BY` monotonic functions
This commit is contained in:
commit
9916e35b3f
@ -1766,12 +1766,12 @@ public:
|
||||
return {true, is_constant_positive, true};
|
||||
}
|
||||
}
|
||||
return {false, true, false};
|
||||
return {false, true, false, false};
|
||||
}
|
||||
|
||||
// For simplicity, we treat every single value interval as positive monotonic.
|
||||
if (applyVisitor(FieldVisitorAccurateEquals(), left_point, right_point))
|
||||
return {true, true, false};
|
||||
return {true, true, false, false};
|
||||
|
||||
if (name_view == "minus" || name_view == "plus")
|
||||
{
|
||||
@ -1797,18 +1797,18 @@ public:
|
||||
// Check if there is an overflow
|
||||
if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
|
||||
== applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
|
||||
return {true, true, false};
|
||||
return {true, true, false, true};
|
||||
else
|
||||
return {false, true, false};
|
||||
return {false, true, false, false};
|
||||
}
|
||||
else
|
||||
{
|
||||
// Check if there is an overflow
|
||||
if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
|
||||
!= applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
|
||||
return {true, false, false};
|
||||
return {true, false, false, true};
|
||||
else
|
||||
return {false, false, false};
|
||||
return {false, false, false, false};
|
||||
}
|
||||
}
|
||||
// variable +|- constant
|
||||
@ -1829,31 +1829,33 @@ public:
|
||||
// Check if there is an overflow
|
||||
if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
|
||||
== applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
|
||||
return {true, true, false};
|
||||
return {true, true, false, true};
|
||||
else
|
||||
return {false, true, false};
|
||||
return {false, true, false, false};
|
||||
}
|
||||
}
|
||||
if (name_view == "divide" || name_view == "intDiv")
|
||||
{
|
||||
bool is_strict = name_view == "divide";
|
||||
|
||||
// const / variable
|
||||
if (left.column && isColumnConst(*left.column))
|
||||
{
|
||||
auto constant = (*left.column)[0];
|
||||
if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
|
||||
return {true, true, false}; // 0 / 0 is undefined, thus it's not always monotonic
|
||||
return {true, true, false, false}; // 0 / 0 is undefined, thus it's not always monotonic
|
||||
|
||||
bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
|
||||
if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0))
|
||||
&& applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0)))
|
||||
{
|
||||
return {true, is_constant_positive, false};
|
||||
return {true, is_constant_positive, false, is_strict};
|
||||
}
|
||||
else if (
|
||||
applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point)
|
||||
&& applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point))
|
||||
{
|
||||
return {true, !is_constant_positive, false};
|
||||
return {true, !is_constant_positive, false, is_strict};
|
||||
}
|
||||
}
|
||||
// variable / constant
|
||||
@ -1861,11 +1863,11 @@ public:
|
||||
{
|
||||
auto constant = (*right.column)[0];
|
||||
if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
|
||||
return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic
|
||||
return {false, true, false, false}; // variable / 0 is undefined, let's treat it as non-monotonic
|
||||
|
||||
bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
|
||||
// division is saturated to `inf`, thus it doesn't have overflow issues.
|
||||
return {true, is_constant_positive, true};
|
||||
return {true, is_constant_positive, true, is_strict};
|
||||
}
|
||||
}
|
||||
return {false, true, false};
|
||||
|
@ -265,9 +265,10 @@ public:
|
||||
/// The property of monotonicity for a certain range.
|
||||
struct Monotonicity
|
||||
{
|
||||
bool is_monotonic = false; /// Is the function monotonous (non-decreasing or non-increasing).
|
||||
bool is_positive = true; /// true if the function is non-decreasing, false if non-increasing. If is_monotonic = false, then it does not matter.
|
||||
bool is_monotonic = false; /// Is the function monotonous (non-decreasing or non-increasing).
|
||||
bool is_positive = true; /// true if the function is non-decreasing, false if non-increasing. If is_monotonic = false, then it does not matter.
|
||||
bool is_always_monotonic = false; /// Is true if function is monotonic on the whole input range I
|
||||
bool is_strict = false; /// true if the function is strictly decreasing or increasing.
|
||||
};
|
||||
|
||||
/** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity.
|
||||
|
@ -46,7 +46,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameAbs>
|
||||
if ((left_float < 0 && right_float > 0) || (left_float > 0 && right_float < 0))
|
||||
return {};
|
||||
|
||||
return { .is_monotonic = true, .is_positive = left_float > 0 };
|
||||
return { .is_monotonic = true, .is_positive = left_float > 0, .is_strict = true, };
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -139,7 +139,7 @@ namespace DB
|
||||
|
||||
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
|
||||
{
|
||||
return { .is_monotonic = true, .is_always_monotonic = true };
|
||||
return { .is_monotonic = true, .is_always_monotonic = true, .is_strict = true, };
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -55,7 +55,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp10>
|
||||
if (left_float < 0 || right_float > 19)
|
||||
return {};
|
||||
|
||||
return { .is_monotonic = true };
|
||||
return { .is_monotonic = true, .is_strict = true };
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -58,7 +58,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp2>
|
||||
if (left_float < 0 || right_float > 63)
|
||||
return {};
|
||||
|
||||
return { .is_monotonic = true };
|
||||
return { .is_monotonic = true, .is_strict = true, };
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -42,7 +42,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameNegate>
|
||||
static bool has() { return true; }
|
||||
static IFunction::Monotonicity get(const Field &, const Field &)
|
||||
{
|
||||
return { .is_monotonic = true, .is_positive = false };
|
||||
return { .is_monotonic = true, .is_positive = false, .is_strict = true };
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -157,7 +157,7 @@ namespace DB
|
||||
|
||||
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
|
||||
{
|
||||
return { .is_monotonic = true, .is_always_monotonic = true };
|
||||
return { .is_monotonic = true, .is_always_monotonic = true, .is_strict = true };
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -67,7 +67,7 @@ public:
|
||||
Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const override
|
||||
{
|
||||
const bool b = is_constant_timezone;
|
||||
return { .is_monotonic = b, .is_positive = b, .is_always_monotonic = b };
|
||||
return { .is_monotonic = b, .is_positive = b, .is_always_monotonic = b, .is_strict = b };
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -103,10 +103,17 @@ NameSet getFixedSortingColumns(
|
||||
return fixed_points;
|
||||
}
|
||||
|
||||
struct MatchResult
|
||||
{
|
||||
/// One of {-1, 0, 1} - direction of the match. 0 means - doesn't match.
|
||||
int direction = 0;
|
||||
/// If true then current key must be the last in the matched prefix of sort description.
|
||||
bool is_last_key = false;
|
||||
};
|
||||
|
||||
/// Optimize in case of exact match with order key element
|
||||
/// or in some simple cases when order key element is wrapped into monotonic function.
|
||||
/// Returns on of {-1, 0, 1} - direction of the match. 0 means - doesn't match.
|
||||
int matchSortDescriptionAndKey(
|
||||
MatchResult matchSortDescriptionAndKey(
|
||||
const ExpressionActions::Actions & actions,
|
||||
const SortColumnDescription & sort_column,
|
||||
const String & sorting_key_column)
|
||||
@ -114,12 +121,13 @@ int matchSortDescriptionAndKey(
|
||||
/// If required order depend on collation, it cannot be matched with primary key order.
|
||||
/// Because primary keys cannot have collations.
|
||||
if (sort_column.collator)
|
||||
return 0;
|
||||
return {};
|
||||
|
||||
MatchResult result{sort_column.direction, false};
|
||||
|
||||
int current_direction = sort_column.direction;
|
||||
/// For the path: order by (sort_column, ...)
|
||||
if (sort_column.column_name == sorting_key_column)
|
||||
return current_direction;
|
||||
return result;
|
||||
|
||||
/// For the path: order by (function(sort_column), ...)
|
||||
/// Allow only one simple monotonic functions with one argument
|
||||
@ -132,44 +140,35 @@ int matchSortDescriptionAndKey(
|
||||
continue;
|
||||
|
||||
if (found_function)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
found_function = true;
|
||||
}
|
||||
return {};
|
||||
|
||||
found_function = true;
|
||||
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_column)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
return {};
|
||||
|
||||
const auto & func = *action.node->function_base;
|
||||
if (!func.hasInformationAboutMonotonicity())
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
return {};
|
||||
|
||||
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
|
||||
if (!monotonicity.is_monotonic)
|
||||
{
|
||||
current_direction = 0;
|
||||
break;
|
||||
}
|
||||
else if (!monotonicity.is_positive)
|
||||
{
|
||||
current_direction *= -1;
|
||||
}
|
||||
return {};
|
||||
|
||||
/// If function is not strict monotonic, it can break order
|
||||
/// if it's not last in the prefix of sort description.
|
||||
/// E.g. if we have ORDER BY (d, u) -- ('2020-01-01', 1), ('2020-01-02', 0), ('2020-01-03', 1)
|
||||
/// ORDER BY (toStartOfMonth(d), u) -- ('2020-01-01', 1), ('2020-01-01', 0), ('2020-01-01', 1)
|
||||
if (!monotonicity.is_strict)
|
||||
result.is_last_key = true;
|
||||
|
||||
if (!monotonicity.is_positive)
|
||||
result.direction *= -1;
|
||||
}
|
||||
|
||||
if (!found_function)
|
||||
current_direction = 0;
|
||||
return {};
|
||||
|
||||
return current_direction;
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@ -218,8 +217,8 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
|
||||
if (forbidden_columns.contains(description[desc_pos].column_name))
|
||||
break;
|
||||
|
||||
int current_direction = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
|
||||
bool is_matched = current_direction && (desc_pos == 0 || current_direction == read_direction);
|
||||
auto match = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
|
||||
bool is_matched = match.direction && (desc_pos == 0 || match.direction == read_direction);
|
||||
|
||||
if (!is_matched)
|
||||
{
|
||||
@ -235,12 +234,15 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
|
||||
}
|
||||
|
||||
if (desc_pos == 0)
|
||||
read_direction = current_direction;
|
||||
read_direction = match.direction;
|
||||
|
||||
sort_description_for_merging.push_back(description[desc_pos]);
|
||||
|
||||
++desc_pos;
|
||||
++key_pos;
|
||||
|
||||
if (match.is_last_key)
|
||||
break;
|
||||
}
|
||||
|
||||
if (sort_description_for_merging.empty())
|
||||
|
@ -8,11 +8,13 @@ ExpressionTransform
|
||||
(Limit)
|
||||
Limit
|
||||
(Sorting)
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder × 2 0 → 1
|
||||
FinishSortingTransform
|
||||
PartialSortingTransform
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder × 2 0 → 1
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
@ -23,14 +25,16 @@ ExpressionTransform
|
||||
(Limit)
|
||||
Limit
|
||||
(Sorting)
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(ReadFromMergeTree)
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
FinishSortingTransform
|
||||
PartialSortingTransform
|
||||
MergingSortedTransform 2 → 1
|
||||
(Expression)
|
||||
ExpressionTransform × 2
|
||||
(ReadFromMergeTree)
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
ReverseTransform
|
||||
MergeTreeReverse 0 → 1
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
2020-10-01 9
|
||||
|
22
tests/queries/0_stateless/02451_order_by_monotonic.reference
Normal file
22
tests/queries/0_stateless/02451_order_by_monotonic.reference
Normal file
@ -0,0 +1,22 @@
|
||||
4
|
||||
2022-09-09 12:00:00 0
|
||||
2022-09-09 12:00:00 0x
|
||||
2022-09-09 12:00:00 0x
|
||||
2022-09-09 12:00:00 1
|
||||
2022-09-09 12:00:00 1x
|
||||
Prefix sort description: toStartOfMinute(t) ASC
|
||||
Result sort description: toStartOfMinute(t) ASC, c1 ASC
|
||||
Prefix sort description: toStartOfMinute(t) ASC
|
||||
Result sort description: toStartOfMinute(t) ASC
|
||||
Prefix sort description: negate(a) ASC
|
||||
Result sort description: negate(a) ASC
|
||||
Prefix sort description: negate(a) ASC, negate(b) ASC
|
||||
Result sort description: negate(a) ASC, negate(b) ASC
|
||||
Prefix sort description: a DESC, negate(b) ASC
|
||||
Result sort description: a DESC, negate(b) ASC
|
||||
Prefix sort description: negate(a) ASC, b DESC
|
||||
Result sort description: negate(a) ASC, b DESC
|
||||
Prefix sort description: negate(a) ASC
|
||||
Result sort description: negate(a) ASC, b ASC
|
||||
Prefix sort description: a ASC
|
||||
Result sort description: a ASC, negate(b) ASC
|
40
tests/queries/0_stateless/02451_order_by_monotonic.sh
Executable file
40
tests/queries/0_stateless/02451_order_by_monotonic.sh
Executable file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
function explain_sort_description()
|
||||
{
|
||||
out=$($CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1")
|
||||
echo "$out" | grep "Prefix sort description:"
|
||||
echo "$out" | grep "Result sort description:"
|
||||
}
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1)
|
||||
AS SELECT '2022-09-09 12:00:00', toString(number % 2) FROM numbers(2) UNION ALL
|
||||
SELECT '2022-09-09 12:00:30', toString(number % 2)|| 'x' FROM numbers(3)"
|
||||
|
||||
$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -q "SELECT count() FROM
|
||||
(SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic GROUP BY s, c1)"
|
||||
|
||||
$CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1"
|
||||
|
||||
explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1"
|
||||
explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)"
|
||||
|
||||
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a"
|
||||
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, -b"
|
||||
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a DESC, -b"
|
||||
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b DESC"
|
||||
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b"
|
||||
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a, -b"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
|
Loading…
Reference in New Issue
Block a user