Optimize quantilesExact{Low,High} to use nth_element instead of sort

This commit is contained in:
Danila Kutenin 2022-02-03 12:24:33 +00:00
parent f4423937bd
commit c90b1f7794
2 changed files with 31 additions and 26 deletions

View File

@ -262,9 +262,7 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
{ {
if (!array.empty()) if (!array.empty())
{ {
// sort inputs in ascending order size_t n = 0;
::sort(array.begin(), array.end());
// if level is 0.5 then compute the "low" median of the sorted array // if level is 0.5 then compute the "low" median of the sorted array
// by the method of rounding. // by the method of rounding.
if (level == 0.5) if (level == 0.5)
@ -272,11 +270,11 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
auto s = array.size(); auto s = array.size();
if (s % 2 == 1) if (s % 2 == 1)
{ {
return array[static_cast<size_t>(floor(s / 2))]; n = static_cast<size_t>(floor(s / 2));
} }
else else
{ {
return array[static_cast<size_t>((floor(s / 2)) - 1)]; n = static_cast<size_t>((floor(s / 2)) - 1);
} }
} }
else else
@ -284,9 +282,10 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
// else quantile is the nth index of the sorted array obtained by multiplying // else quantile is the nth index of the sorted array obtained by multiplying
// level and size of array. Example if level = 0.1 and size of array is 10, // level and size of array. Example if level = 0.1 and size of array is 10,
// then return array[1]. // then return array[1].
size_t n = level < 1 ? level * array.size() : (array.size() - 1); n = level < 1 ? level * array.size() : (array.size() - 1);
return array[n];
} }
::nth_element(array.begin(), array.begin() + n, array.end());
return array[n];
} }
return std::numeric_limits<Value>::quiet_NaN(); return std::numeric_limits<Value>::quiet_NaN();
} }
@ -295,12 +294,11 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
{ {
if (!array.empty()) if (!array.empty())
{ {
// sort inputs in ascending order size_t prev_n = 0;
::sort(array.begin(), array.end());
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
auto level = levels[indices[i]]; auto level = levels[indices[i]];
size_t n = 0;
// if level is 0.5 then compute the "low" median of the sorted array // if level is 0.5 then compute the "low" median of the sorted array
// by the method of rounding. // by the method of rounding.
if (level == 0.5) if (level == 0.5)
@ -308,20 +306,22 @@ struct QuantileExactLow : public QuantileExactBase<Value, QuantileExactLow<Value
auto s = array.size(); auto s = array.size();
if (s % 2 == 1) if (s % 2 == 1)
{ {
result[indices[i]] = array[static_cast<size_t>(floor(s / 2))]; n = static_cast<size_t>(floor(s / 2));
} }
else else
{ {
result[indices[i]] = array[static_cast<size_t>(floor((s / 2) - 1))]; n = static_cast<size_t>(floor((s / 2) - 1));
} }
} }
else else
{ {
// else quantile is the nth index of the sorted array obtained by multiplying // else quantile is the nth index of the sorted array obtained by multiplying
// level and size of array. Example if level = 0.1 and size of array is 10. // level and size of array. Example if level = 0.1 and size of array is 10.
size_t n = level < 1 ? level * array.size() : (array.size() - 1); n = level < 1 ? level * array.size() : (array.size() - 1);
result[indices[i]] = array[n];
} }
::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
result[indices[i]] = array[n];
prev_n = n;
} }
} }
else else
@ -344,23 +344,22 @@ struct QuantileExactHigh : public QuantileExactBase<Value, QuantileExactHigh<Val
{ {
if (!array.empty()) if (!array.empty())
{ {
// sort inputs in ascending order size_t n = 0;
::sort(array.begin(), array.end());
// if level is 0.5 then compute the "high" median of the sorted array // if level is 0.5 then compute the "high" median of the sorted array
// by the method of rounding. // by the method of rounding.
if (level == 0.5) if (level == 0.5)
{ {
auto s = array.size(); auto s = array.size();
return array[static_cast<size_t>(floor(s / 2))]; n = static_cast<size_t>(floor(s / 2));
} }
else else
{ {
// else quantile is the nth index of the sorted array obtained by multiplying // else quantile is the nth index of the sorted array obtained by multiplying
// level and size of array. Example if level = 0.1 and size of array is 10. // level and size of array. Example if level = 0.1 and size of array is 10.
size_t n = level < 1 ? level * array.size() : (array.size() - 1); n = level < 1 ? level * array.size() : (array.size() - 1);
return array[n];
} }
::nth_element(array.begin(), array.begin() + n, array.end());
return array[n];
} }
return std::numeric_limits<Value>::quiet_NaN(); return std::numeric_limits<Value>::quiet_NaN();
} }
@ -369,26 +368,27 @@ struct QuantileExactHigh : public QuantileExactBase<Value, QuantileExactHigh<Val
{ {
if (!array.empty()) if (!array.empty())
{ {
// sort inputs in ascending order size_t prev_n = 0;
::sort(array.begin(), array.end());
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
{ {
auto level = levels[indices[i]]; auto level = levels[indices[i]];
size_t n = 0;
// if level is 0.5 then compute the "high" median of the sorted array // if level is 0.5 then compute the "high" median of the sorted array
// by the method of rounding. // by the method of rounding.
if (level == 0.5) if (level == 0.5)
{ {
auto s = array.size(); auto s = array.size();
result[indices[i]] = array[static_cast<size_t>(floor(s / 2))]; n = static_cast<size_t>(floor(s / 2));
} }
else else
{ {
// else quantile is the nth index of the sorted array obtained by multiplying // else quantile is the nth index of the sorted array obtained by multiplying
// level and size of array. Example if level = 0.1 and size of array is 10. // level and size of array. Example if level = 0.1 and size of array is 10.
size_t n = level < 1 ? level * array.size() : (array.size() - 1); n = level < 1 ? level * array.size() : (array.size() - 1);
result[indices[i]] = array[n];
} }
::nth_element(array.begin() + prev_n, array.begin() + n, array.end());
result[indices[i]] = array[n];
prev_n = n;
} }
} }
else else

View File

@ -28,6 +28,11 @@
<query>SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM (SELECT * FROM t LIMIT 1000000)</query> <query>SELECT quantile(d64), quantileExact(d64), quantileExactWeighted(d64, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
<query>SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query> <query>SELECT quantile(d128), quantileExact(d128), quantileExactWeighted(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
<query>SELECT quantilesExactLow(0.5)(d32) FROM (SELECT * FROM t LIMIT 10000000)</query>
<query>SELECT quantilesExactHigh(0.5)(d32) FROM (SELECT * FROM t LIMIT 10000000)</query>
<query>SELECT quantilesExactLow(0.1, 0.5, 0.9)(d32) FROM (SELECT * FROM t LIMIT 10000000)</query>
<query>SELECT quantilesExactHigh(0.1, 0.5, 0.9)(d32) FROM (SELECT * FROM t LIMIT 10000000)</query>
<query>SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM (SELECT * FROM t LIMIT 10000000)</query> <query>SELECT quantilesExact(0.1, 0.9)(d32), quantilesExactWeighted(0.1, 0.9)(d32, 2) FROM (SELECT * FROM t LIMIT 10000000)</query>
<query>SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM (SELECT * FROM t LIMIT 1000000)</query> <query>SELECT quantilesExact(0.1, 0.9)(d64), quantilesExactWeighted(0.1, 0.9)(d64, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>
<query>SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query> <query>SELECT quantilesExact(0.1, 0.9)(d128), quantilesExactWeighted(0.1, 0.9)(d128, 2) FROM (SELECT * FROM t LIMIT 1000000)</query>