Merge pull request #41701 from CurtizJ/fix-monotonic-order-by

Fix `ORDER BY` monotonic functions
This commit is contained in:
Anton Popov 2022-09-30 17:32:37 +02:00 committed by GitHub
commit 9916e35b3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 138 additions and 67 deletions

View File

@ -1766,12 +1766,12 @@ public:
return {true, is_constant_positive, true};
}
}
return {false, true, false};
return {false, true, false, false};
}
// For simplicity, we treat every single value interval as positive monotonic.
if (applyVisitor(FieldVisitorAccurateEquals(), left_point, right_point))
return {true, true, false};
return {true, true, false, false};
if (name_view == "minus" || name_view == "plus")
{
@ -1797,18 +1797,18 @@ public:
// Check if there is an overflow
if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
== applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
return {true, true, false};
return {true, true, false, true};
else
return {false, true, false};
return {false, true, false, false};
}
else
{
// Check if there is an overflow
if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
!= applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
return {true, false, false};
return {true, false, false, true};
else
return {false, false, false};
return {false, false, false, false};
}
}
// variable +|- constant
@ -1829,31 +1829,33 @@ public:
// Check if there is an overflow
if (applyVisitor(FieldVisitorAccurateLess(), left_point, right_point)
== applyVisitor(FieldVisitorAccurateLess(), transform(left_point), transform(right_point)))
return {true, true, false};
return {true, true, false, true};
else
return {false, true, false};
return {false, true, false, false};
}
}
if (name_view == "divide" || name_view == "intDiv")
{
bool is_strict = name_view == "divide";
// const / variable
if (left.column && isColumnConst(*left.column))
{
auto constant = (*left.column)[0];
if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
return {true, true, false}; // 0 / 0 is undefined, thus it's not always monotonic
return {true, true, false, false}; // 0 / 0 is undefined, thus it's not always monotonic
bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0))
&& applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0)))
{
return {true, is_constant_positive, false};
return {true, is_constant_positive, false, is_strict};
}
else if (
applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point)
&& applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point))
{
return {true, !is_constant_positive, false};
return {true, !is_constant_positive, false, is_strict};
}
}
// variable / constant
@ -1861,11 +1863,11 @@ public:
{
auto constant = (*right.column)[0];
if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic
return {false, true, false, false}; // variable / 0 is undefined, let's treat it as non-monotonic
bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
// division is saturated to `inf`, thus it doesn't have overflow issues.
return {true, is_constant_positive, true};
return {true, is_constant_positive, true, is_strict};
}
}
return {false, true, false};

View File

@ -265,9 +265,10 @@ public:
/// The property of monotonicity for a certain range.
struct Monotonicity
{
bool is_monotonic = false; /// Is the function monotonous (non-decreasing or non-increasing).
bool is_positive = true; /// true if the function is non-decreasing, false if non-increasing. If is_monotonic = false, then it does not matter.
bool is_monotonic = false; /// Is the function monotonous (non-decreasing or non-increasing).
bool is_positive = true; /// true if the function is non-decreasing, false if non-increasing. If is_monotonic = false, then it does not matter.
bool is_always_monotonic = false; /// Is true if function is monotonic on the whole input range I
bool is_strict = false; /// true if the function is strictly decreasing or increasing.
};
/** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity.

View File

@ -46,7 +46,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameAbs>
if ((left_float < 0 && right_float > 0) || (left_float > 0 && right_float < 0))
return {};
return { .is_monotonic = true, .is_positive = left_float > 0 };
return { .is_monotonic = true, .is_positive = left_float > 0, .is_strict = true, };
}
};

View File

@ -139,7 +139,7 @@ namespace DB
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
{
return { .is_monotonic = true, .is_always_monotonic = true };
return { .is_monotonic = true, .is_always_monotonic = true, .is_strict = true, };
}
private:

View File

@ -55,7 +55,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp10>
if (left_float < 0 || right_float > 19)
return {};
return { .is_monotonic = true };
return { .is_monotonic = true, .is_strict = true };
}
};

View File

@ -58,7 +58,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameIntExp2>
if (left_float < 0 || right_float > 63)
return {};
return { .is_monotonic = true };
return { .is_monotonic = true, .is_strict = true, };
}
};

View File

@ -42,7 +42,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameNegate>
static bool has() { return true; }
static IFunction::Monotonicity get(const Field &, const Field &)
{
return { .is_monotonic = true, .is_positive = false };
return { .is_monotonic = true, .is_positive = false, .is_strict = true };
}
};

View File

@ -157,7 +157,7 @@ namespace DB
Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
{
return { .is_monotonic = true, .is_always_monotonic = true };
return { .is_monotonic = true, .is_always_monotonic = true, .is_strict = true };
}
private:

View File

@ -67,7 +67,7 @@ public:
Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const override
{
const bool b = is_constant_timezone;
return { .is_monotonic = b, .is_positive = b, .is_always_monotonic = b };
return { .is_monotonic = b, .is_positive = b, .is_always_monotonic = b, .is_strict = b };
}
private:

View File

@ -103,10 +103,17 @@ NameSet getFixedSortingColumns(
return fixed_points;
}
struct MatchResult
{
/// One of {-1, 0, 1} - direction of the match. 0 means - doesn't match.
int direction = 0;
/// If true then current key must be the last in the matched prefix of sort description.
bool is_last_key = false;
};
/// Optimize in case of exact match with order key element
/// or in some simple cases when order key element is wrapped into monotonic function.
/// Returns on of {-1, 0, 1} - direction of the match. 0 means - doesn't match.
int matchSortDescriptionAndKey(
MatchResult matchSortDescriptionAndKey(
const ExpressionActions::Actions & actions,
const SortColumnDescription & sort_column,
const String & sorting_key_column)
@ -114,12 +121,13 @@ int matchSortDescriptionAndKey(
/// If required order depend on collation, it cannot be matched with primary key order.
/// Because primary keys cannot have collations.
if (sort_column.collator)
return 0;
return {};
MatchResult result{sort_column.direction, false};
int current_direction = sort_column.direction;
/// For the path: order by (sort_column, ...)
if (sort_column.column_name == sorting_key_column)
return current_direction;
return result;
/// For the path: order by (function(sort_column), ...)
/// Allow only one simple monotonic functions with one argument
@ -132,44 +140,35 @@ int matchSortDescriptionAndKey(
continue;
if (found_function)
{
current_direction = 0;
break;
}
else
{
found_function = true;
}
return {};
found_function = true;
if (action.node->children.size() != 1 || action.node->children.at(0)->result_name != sorting_key_column)
{
current_direction = 0;
break;
}
return {};
const auto & func = *action.node->function_base;
if (!func.hasInformationAboutMonotonicity())
{
current_direction = 0;
break;
}
return {};
auto monotonicity = func.getMonotonicityForRange(*func.getArgumentTypes().at(0), {}, {});
if (!monotonicity.is_monotonic)
{
current_direction = 0;
break;
}
else if (!monotonicity.is_positive)
{
current_direction *= -1;
}
return {};
/// If function is not strict monotonic, it can break order
/// if it's not last in the prefix of sort description.
/// E.g. if we have ORDER BY (d, u) -- ('2020-01-01', 1), ('2020-01-02', 0), ('2020-01-03', 1)
/// ORDER BY (toStartOfMonth(d), u) -- ('2020-01-01', 1), ('2020-01-01', 0), ('2020-01-01', 1)
if (!monotonicity.is_strict)
result.is_last_key = true;
if (!monotonicity.is_positive)
result.direction *= -1;
}
if (!found_function)
current_direction = 0;
return {};
return current_direction;
return result;
}
}
@ -218,8 +217,8 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
if (forbidden_columns.contains(description[desc_pos].column_name))
break;
int current_direction = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
bool is_matched = current_direction && (desc_pos == 0 || current_direction == read_direction);
auto match = matchSortDescriptionAndKey(actions[desc_pos]->getActions(), description[desc_pos], sorting_key_columns[key_pos]);
bool is_matched = match.direction && (desc_pos == 0 || match.direction == read_direction);
if (!is_matched)
{
@ -235,12 +234,15 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
}
if (desc_pos == 0)
read_direction = current_direction;
read_direction = match.direction;
sort_description_for_merging.push_back(description[desc_pos]);
++desc_pos;
++key_pos;
if (match.is_last_key)
break;
}
if (sort_description_for_merging.empty())

View File

@ -8,11 +8,13 @@ ExpressionTransform
(Limit)
Limit
(Sorting)
MergingSortedTransform 2 → 1
(Expression)
ExpressionTransform × 2
(ReadFromMergeTree)
MergeTreeInOrder × 2 0 → 1
FinishSortingTransform
PartialSortingTransform
MergingSortedTransform 2 → 1
(Expression)
ExpressionTransform × 2
(ReadFromMergeTree)
MergeTreeInOrder × 2 0 → 1
2020-10-01 9
2020-10-01 9
2020-10-01 9
@ -23,14 +25,16 @@ ExpressionTransform
(Limit)
Limit
(Sorting)
MergingSortedTransform 2 → 1
(Expression)
ExpressionTransform × 2
(ReadFromMergeTree)
ReverseTransform
MergeTreeReverse 0 → 1
FinishSortingTransform
PartialSortingTransform
MergingSortedTransform 2 → 1
(Expression)
ExpressionTransform × 2
(ReadFromMergeTree)
ReverseTransform
MergeTreeReverse 0 → 1
ReverseTransform
MergeTreeReverse 0 → 1
2020-10-01 9
2020-10-01 9
2020-10-01 9

View File

@ -0,0 +1,22 @@
4
2022-09-09 12:00:00 0
2022-09-09 12:00:00 0x
2022-09-09 12:00:00 0x
2022-09-09 12:00:00 1
2022-09-09 12:00:00 1x
Prefix sort description: toStartOfMinute(t) ASC
Result sort description: toStartOfMinute(t) ASC, c1 ASC
Prefix sort description: toStartOfMinute(t) ASC
Result sort description: toStartOfMinute(t) ASC
Prefix sort description: negate(a) ASC
Result sort description: negate(a) ASC
Prefix sort description: negate(a) ASC, negate(b) ASC
Result sort description: negate(a) ASC, negate(b) ASC
Prefix sort description: a DESC, negate(b) ASC
Result sort description: a DESC, negate(b) ASC
Prefix sort description: negate(a) ASC, b DESC
Result sort description: negate(a) ASC, b DESC
Prefix sort description: negate(a) ASC
Result sort description: negate(a) ASC, b ASC
Prefix sort description: a ASC
Result sort description: a ASC, negate(b) ASC

View File

@ -0,0 +1,40 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
function explain_sort_description()
{
out=$($CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "EXPLAIN PLAN actions = 1 $1")
echo "$out" | grep "Prefix sort description:"
echo "$out" | grep "Result sort description:"
}
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (t DateTime, c1 String) ENGINE = MergeTree ORDER BY (t, c1)
AS SELECT '2022-09-09 12:00:00', toString(number % 2) FROM numbers(2) UNION ALL
SELECT '2022-09-09 12:00:30', toString(number % 2)|| 'x' FROM numbers(3)"
$CLICKHOUSE_CLIENT --optimize_aggregation_in_order=1 -q "SELECT count() FROM
(SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic GROUP BY s, c1)"
$CLICKHOUSE_CLIENT --optimize_read_in_order=1 -q "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1"
explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s, c1"
explain_sort_description "SELECT toStartOfMinute(t) AS s, c1 FROM t_order_by_monotonic ORDER BY s"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_order_by_monotonic (a Int64, b Int64) ENGINE = MergeTree ORDER BY (a, b)"
$CLICKHOUSE_CLIENT -q "INSERT INTO t_order_by_monotonic VALUES (1, 1) (1, 2), (2, 1) (2, 2)"
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a"
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, -b"
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a DESC, -b"
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b DESC"
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY -a, b"
explain_sort_description "SELECT * FROM t_order_by_monotonic ORDER BY a, -b"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_order_by_monotonic"