From c90b1f779493683ce47ebd2c5d08548d3278a147 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 3 Feb 2022 12:24:33 +0000 Subject: [PATCH] Optimize quantilesExact{Low,High} to use nth_element instead of sort --- src/AggregateFunctions/QuantileExact.h | 52 ++++++++++++------------ tests/performance/decimal_aggregates.xml | 5 +++ 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/AggregateFunctions/QuantileExact.h b/src/AggregateFunctions/QuantileExact.h index c76903f5081..9be24689d12 100644 --- a/src/AggregateFunctions/QuantileExact.h +++ b/src/AggregateFunctions/QuantileExact.h @@ -262,9 +262,7 @@ struct QuantileExactLow : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { - return array[static_cast((floor(s / 2)) - 1)]; + n = static_cast((floor(s / 2)) - 1); } } else @@ -284,9 +282,10 @@ struct QuantileExactLow : public QuantileExactBase::quiet_NaN(); } @@ -295,12 +294,11 @@ struct QuantileExactLow : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { - result[indices[i]] = array[static_cast(floor((s / 2) - 1))]; + n = static_cast(floor((s / 2) - 1)); } } else { // else quantile is the nth index of the sorted array obtained by multiplying // level and size of array. Example if level = 0.1 and size of array is 10. - size_t n = level < 1 ? level * array.size() : (array.size() - 1); - result[indices[i]] = array[n]; + n = level < 1 ? level * array.size() : (array.size() - 1); } + ::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + result[indices[i]] = array[n]; + prev_n = n; } } else @@ -344,23 +344,22 @@ struct QuantileExactHigh : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { // else quantile is the nth index of the sorted array obtained by multiplying // level and size of array. Example if level = 0.1 and size of array is 10. - size_t n = level < 1 ? level * array.size() : (array.size() - 1); - return array[n]; + n = level < 1 ? level * array.size() : (array.size() - 1); } + ::nth_element(array.begin(), array.begin() + n, array.end()); + return array[n]; } return std::numeric_limits::quiet_NaN(); } @@ -369,26 +368,27 @@ struct QuantileExactHigh : public QuantileExactBase(floor(s / 2))]; + n = static_cast(floor(s / 2)); } else { // else quantile is the nth index of the sorted array obtained by multiplying // level and size of array. Example if level = 0.1 and size of array is 10. - size_t n = level < 1 ? level * array.size() : (array.size() - 1); - result[indices[i]] = array[n]; + n = level < 1 ? level * array.size() : (array.size() - 1); } + ::nth_element(array.begin() + prev_n, array.begin() + n, array.end()); + result[indices[i]] = array[n]; + prev_n = n; } } else diff --git a/tests/performance/decimal_aggregates.xml b/tests/performance/decimal_aggregates.xml index 7078cb16002..ec88be0124f 100644 --- a/tests/performance/decimal_aggregates.xml +++ b/tests/performance/decimal_aggregates.xml @@ -28,6 +28,11 @@ SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM (SELECT * FROM t LIMIT 1000000) SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000) + SELECT quantilesExactLow(0.5)(d32) FROM (SELECT * FROM t LIMIT 10000000) + SELECT quantilesExactHigh(0.5)(d32) FROM (SELECT * FROM t LIMIT 10000000) + SELECT quantilesExactLow(0.1, 0.5, 0.9)(d32) FROM (SELECT * FROM t LIMIT 10000000) + SELECT quantilesExactHigh(0.1, 0.5, 0.9)(d32) FROM (SELECT * FROM t LIMIT 10000000) + SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM (SELECT * FROM t LIMIT 10000000) SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM (SELECT * FROM t LIMIT 1000000) SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)