mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #14495 from nikitamikhaylov/update-permutation-bugfix-3
updatePermumation with Nullable
This commit is contained in:
commit
65517da62b
@ -781,18 +781,21 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
|
||||
|
||||
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
if (equal_range.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_range.size();
|
||||
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto & [first, last] = equal_range[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
|
||||
@ -817,7 +820,13 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_range.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
|
||||
else
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Core/BigInt.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
@ -142,25 +143,31 @@ void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn:
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= data.size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= data.size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
@ -178,13 +185,20 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] > data[b]; });
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last,
|
||||
[this](size_t a, size_t b) { return data[a] < data[b]; });
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -208,7 +222,6 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -168,24 +170,29 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t k = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--k;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < k; ++i)
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
@ -202,11 +209,18 @@ void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permu
|
||||
}
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -230,7 +244,6 @@ void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permu
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/WeakHash.h>
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -329,19 +330,24 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
|
||||
@ -366,7 +372,13 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
|
||||
@ -374,6 +386,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
|
||||
auto new_first = first;
|
||||
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0)
|
||||
@ -384,6 +397,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
new_first = j;
|
||||
}
|
||||
}
|
||||
|
||||
auto new_last = limit;
|
||||
for (auto j = limit; j < last; ++j)
|
||||
{
|
||||
@ -396,7 +410,6 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
|
@ -329,73 +329,113 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= equal_range.back().second || limit >= size())
|
||||
limit = 0;
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
EqualRanges new_ranges, temp_ranges;
|
||||
/// We will sort nested columns into `new_ranges` and call updatePermutation in next columns with `null_ranges`.
|
||||
EqualRanges new_ranges, null_ranges;
|
||||
|
||||
for (const auto &[first, last] : equal_range)
|
||||
const auto is_nulls_last = ((null_direction_hint > 0) != reverse);
|
||||
|
||||
if (is_nulls_last)
|
||||
{
|
||||
bool direction = ((null_direction_hint > 0) != reverse);
|
||||
/// Shift all NULL values to the end.
|
||||
|
||||
size_t read_idx = first;
|
||||
size_t write_idx = first;
|
||||
while (read_idx < last && (isNullAt(res[read_idx])^direction))
|
||||
for (const auto & [first, last] : equal_ranges)
|
||||
{
|
||||
++read_idx;
|
||||
++write_idx;
|
||||
}
|
||||
/// Current interval is righter than limit.
|
||||
if (limit && first > limit)
|
||||
break;
|
||||
|
||||
++read_idx;
|
||||
/// Consider a half interval [first, last)
|
||||
size_t read_idx = first;
|
||||
size_t write_idx = first;
|
||||
size_t end_idx = last;
|
||||
|
||||
/// Invariants:
|
||||
/// write_idx < read_idx
|
||||
/// write_idx points to NULL
|
||||
/// read_idx will be incremented to position of next not-NULL
|
||||
/// there are range of NULLs between write_idx and read_idx - 1,
|
||||
/// We are moving elements from end to begin of this range,
|
||||
/// so range will "bubble" towards the end.
|
||||
/// Relative order of NULL elements could be changed,
|
||||
/// but relative order of non-NULLs is preserved.
|
||||
|
||||
while (read_idx < last && write_idx < last)
|
||||
{
|
||||
if (isNullAt(res[read_idx])^direction)
|
||||
/// We can't check the limit here because the interval is not sorted by nested column.
|
||||
while (read_idx < end_idx && !isNullAt(res[read_idx]))
|
||||
{
|
||||
std::swap(res[read_idx], res[write_idx]);
|
||||
++read_idx;
|
||||
++write_idx;
|
||||
}
|
||||
++read_idx;
|
||||
}
|
||||
|
||||
if (write_idx - first > 1)
|
||||
{
|
||||
if (direction)
|
||||
temp_ranges.emplace_back(first, write_idx);
|
||||
else
|
||||
++read_idx;
|
||||
|
||||
/// Invariants:
|
||||
/// write_idx < read_idx
|
||||
/// write_idx points to NULL
|
||||
/// read_idx will be incremented to position of next not-NULL
|
||||
/// there are range of NULLs between write_idx and read_idx - 1,
|
||||
/// We are moving elements from end to begin of this range,
|
||||
/// so range will "bubble" towards the end.
|
||||
/// Relative order of NULL elements could be changed,
|
||||
/// but relative order of non-NULLs is preserved.
|
||||
|
||||
while (read_idx < end_idx && write_idx < end_idx)
|
||||
{
|
||||
if (!isNullAt(res[read_idx]))
|
||||
{
|
||||
std::swap(res[read_idx], res[write_idx]);
|
||||
++write_idx;
|
||||
}
|
||||
++read_idx;
|
||||
}
|
||||
|
||||
/// We have a range [first, write_idx) of non-NULL values
|
||||
if (first != write_idx)
|
||||
new_ranges.emplace_back(first, write_idx);
|
||||
|
||||
}
|
||||
|
||||
if (last - write_idx > 1)
|
||||
{
|
||||
if (direction)
|
||||
new_ranges.emplace_back(write_idx, last);
|
||||
else
|
||||
temp_ranges.emplace_back(write_idx, last);
|
||||
/// We have a range [write_idx, list) of NULL values
|
||||
if (write_idx != last)
|
||||
null_ranges.emplace_back(write_idx, last);
|
||||
}
|
||||
}
|
||||
while (!new_ranges.empty() && limit && limit <= new_ranges.back().first)
|
||||
new_ranges.pop_back();
|
||||
else
|
||||
{
|
||||
/// Shift all NULL values to the beginning.
|
||||
for (const auto & [first, last] : equal_ranges)
|
||||
{
|
||||
/// Current interval is righter than limit.
|
||||
if (limit && first > limit)
|
||||
break;
|
||||
|
||||
if (!temp_ranges.empty())
|
||||
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, temp_ranges);
|
||||
ssize_t read_idx = last - 1;
|
||||
ssize_t write_idx = last - 1;
|
||||
ssize_t begin_idx = first;
|
||||
|
||||
equal_range.resize(temp_ranges.size() + new_ranges.size());
|
||||
std::merge(temp_ranges.begin(), temp_ranges.end(), new_ranges.begin(), new_ranges.end(), equal_range.begin());
|
||||
while (read_idx >= begin_idx && !isNullAt(res[read_idx]))
|
||||
{
|
||||
--read_idx;
|
||||
--write_idx;
|
||||
}
|
||||
|
||||
--read_idx;
|
||||
|
||||
while (read_idx >= begin_idx && write_idx >= begin_idx)
|
||||
{
|
||||
if (!isNullAt(res[read_idx]))
|
||||
{
|
||||
std::swap(res[read_idx], res[write_idx]);
|
||||
--write_idx;
|
||||
}
|
||||
--read_idx;
|
||||
}
|
||||
|
||||
/// We have a range [write_idx+1, last) of non-NULL values
|
||||
if (write_idx != static_cast<ssize_t>(last))
|
||||
new_ranges.emplace_back(write_idx + 1, last);
|
||||
|
||||
/// We have a range [first, write_idx+1) of NULL values
|
||||
if (static_cast<ssize_t>(first) != write_idx)
|
||||
null_ranges.emplace_back(first, write_idx + 1);
|
||||
}
|
||||
}
|
||||
|
||||
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
|
||||
|
||||
equal_ranges = std::move(new_ranges);
|
||||
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
|
||||
}
|
||||
|
||||
void ColumnNullable::gather(ColumnGathererStream & gatherer)
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
|
||||
#include <ext/scope_guard.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -325,25 +325,30 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit > equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit > equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
auto less_true = less<true>(*this);
|
||||
auto less_false = less<false>(*this);
|
||||
size_t n = equal_range.size();
|
||||
if (limit)
|
||||
--n;
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--number_of_ranges;
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto &[first, last] = equal_range[i];
|
||||
const auto & [first, last] = equal_ranges[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, less_false);
|
||||
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, less_true);
|
||||
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
|
||||
|
||||
size_t new_first = first;
|
||||
for (size_t j = first + 1; j < last; ++j)
|
||||
{
|
||||
@ -363,11 +368,18 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto &[first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less_false);
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less_true);
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
|
||||
|
||||
size_t new_first = first;
|
||||
for (size_t j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -394,7 +406,6 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
|
||||
@ -534,19 +545,25 @@ void ColumnString::getPermutationWithCollation(const Collator & collator, bool r
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation &res, EqualRanges &equal_range) const
|
||||
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (limit >= size() || limit >= equal_range.back().second)
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t n = equal_range.size();
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--n;
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_range[i];
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
@ -566,16 +583,22 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, boo
|
||||
}
|
||||
if (last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, last);
|
||||
|
||||
}
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto& [first, last] = equal_range.back();
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<true>(*this, collator));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
@ -603,7 +626,6 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, boo
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
void ColumnString::protect()
|
||||
|
@ -344,15 +344,19 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
for (const auto& column : columns)
|
||||
{
|
||||
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range);
|
||||
while (limit && !equal_range.empty() && limit <= equal_range.back().first)
|
||||
equal_range.pop_back();
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (equal_range.empty())
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
|
||||
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
|
||||
equal_ranges.pop_back();
|
||||
|
||||
if (equal_ranges.empty())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -382,17 +382,20 @@ int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs,
|
||||
}
|
||||
}
|
||||
|
||||
auto & column_unique = static_cast<const IColumnUnique &>(rhs);
|
||||
const auto & column_unique = static_cast<const IColumnUnique &>(rhs);
|
||||
return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint);
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
void ColumnUnique<ColumnType>::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
void ColumnUnique<ColumnType>::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
bool found_null_value_index = false;
|
||||
for (size_t i = 0; i < equal_range.size() && !found_null_value_index; ++i)
|
||||
for (size_t i = 0; i < equal_ranges.size() && !found_null_value_index; ++i)
|
||||
{
|
||||
auto& [first, last] = equal_range[i];
|
||||
auto & [first, last] = equal_ranges[i];
|
||||
for (auto j = first; j < last; ++j)
|
||||
{
|
||||
if (res[j] == getNullValueIndex())
|
||||
@ -409,14 +412,14 @@ void ColumnUnique<ColumnType>::updatePermutation(bool reverse, size_t limit, int
|
||||
}
|
||||
if (last - first <= 1)
|
||||
{
|
||||
equal_range.erase(equal_range.begin() + i);
|
||||
equal_ranges.erase(equal_ranges.begin() + i);
|
||||
}
|
||||
found_null_value_index = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
getNestedColumn()->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range);
|
||||
getNestedColumn()->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
}
|
||||
|
||||
template <typename IndexType>
|
||||
|
@ -15,8 +15,9 @@
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <ext/bit_cast.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <pdqsort.h>
|
||||
#include <numeric>
|
||||
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
# include <Common/config.h>
|
||||
@ -243,10 +244,14 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
|
||||
template <typename T>
|
||||
void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
if (equal_range.empty())
|
||||
return;
|
||||
|
||||
if (limit >= data.size() || limit >= equal_range.back().second)
|
||||
limit = 0;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
SCOPE_EXIT({equal_range = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < equal_range.size() - bool(limit); ++i)
|
||||
{
|
||||
@ -275,6 +280,12 @@ void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_dire
|
||||
if (limit)
|
||||
{
|
||||
const auto & [first, last] = equal_range.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then, we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint));
|
||||
else
|
||||
@ -307,7 +318,6 @@ void ColumnVector<T>::updatePermutation(bool reverse, size_t limit, int nan_dire
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
}
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -211,13 +211,11 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
||||
for (const auto & column : columns_with_sort_desc)
|
||||
{
|
||||
while (!ranges.empty() && limit && limit <= ranges.back().first)
|
||||
{
|
||||
ranges.pop_back();
|
||||
}
|
||||
|
||||
if (ranges.empty())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
column.column->updatePermutation(
|
||||
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
|
||||
}
|
||||
@ -225,9 +223,7 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
||||
|
||||
size_t columns = block.columns();
|
||||
for (size_t i = 0; i < columns; ++i)
|
||||
{
|
||||
block.getByPosition(i).column = block.getByPosition(i).column->permute(perm, limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
40
tests/queries/0_stateless/01457_order_by_limit.reference
Normal file
40
tests/queries/0_stateless/01457_order_by_limit.reference
Normal file
@ -0,0 +1,40 @@
|
||||
asc nulls last, asc
|
||||
1 1
|
||||
1 2
|
||||
1 3
|
||||
1 4
|
||||
asc nulls first, asc
|
||||
1 1
|
||||
1 2
|
||||
1 3
|
||||
1 4
|
||||
desc nulls last, asc
|
||||
1 1
|
||||
1 2
|
||||
1 3
|
||||
1 4
|
||||
desc nulls first, asc
|
||||
1 1
|
||||
1 2
|
||||
1 3
|
||||
1 4
|
||||
asc nulls last, desc
|
||||
1 8
|
||||
1 7
|
||||
1 6
|
||||
1 5
|
||||
asc nulls first, desc
|
||||
1 8
|
||||
1 7
|
||||
1 6
|
||||
1 5
|
||||
desc nulls last, desc
|
||||
1 8
|
||||
1 7
|
||||
1 6
|
||||
1 5
|
||||
desc nulls first, desc
|
||||
1 8
|
||||
1 7
|
||||
1 6
|
||||
1 5
|
30
tests/queries/0_stateless/01457_order_by_limit.sql
Normal file
30
tests/queries/0_stateless/01457_order_by_limit.sql
Normal file
@ -0,0 +1,30 @@
|
||||
drop table if exists order_by_another;
|
||||
|
||||
create table order_by_another (a Nullable(UInt64), b UInt64) Engine = MergeTree order by tuple();
|
||||
insert into order_by_another values (1, 8), (1, 7), (1, 6), (1, 5), (1, 4), (1, 3), (1, 2), (1, 1);
|
||||
|
||||
select 'asc nulls last, asc';
|
||||
select a, b from order_by_another order by a asc nulls last, b asc limit 4;
|
||||
|
||||
select 'asc nulls first, asc';
|
||||
select a, b from order_by_another order by a asc nulls first, b asc limit 4;
|
||||
|
||||
select 'desc nulls last, asc';
|
||||
select a, b from order_by_another order by a desc nulls last, b asc limit 4;
|
||||
|
||||
select 'desc nulls first, asc';
|
||||
select a, b from order_by_another order by a desc nulls first, b asc limit 4;
|
||||
|
||||
select 'asc nulls last, desc';
|
||||
select a, b from order_by_another order by a asc nulls last, b desc limit 4;
|
||||
|
||||
select 'asc nulls first, desc';
|
||||
select a, b from order_by_another order by a asc nulls first, b desc limit 4;
|
||||
|
||||
select 'desc nulls last, desc';
|
||||
select a, b from order_by_another order by a desc nulls last, b desc limit 4;
|
||||
|
||||
select 'desc nulls first, desc';
|
||||
select a, b from order_by_another order by a desc nulls first, b desc limit 4;
|
||||
|
||||
drop table if exists order_by_another;
|
@ -0,0 +1,76 @@
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
--- DESC NULLS FIRST, ASC
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 1
|
||||
28 0
|
||||
0 0
|
||||
--- DESC NULLS LAST, ASC
|
||||
28 0
|
||||
0 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 1
|
||||
--- ASC NULLS FIRST, ASC
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 1
|
||||
0 0
|
||||
28 0
|
||||
--- ASC NULLS LAST, ASC
|
||||
0 0
|
||||
28 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 1
|
||||
--- DESC NULLS FIRST, DESC
|
||||
\N 1
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
28 0
|
||||
0 0
|
||||
--- DESC NULLS LAST, DESC
|
||||
28 0
|
||||
0 0
|
||||
\N 1
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
--- ASC NULLS FIRST, DESC
|
||||
\N 1
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
0 0
|
||||
28 0
|
||||
--- ASC NULLS LAST, DESC
|
||||
0 0
|
||||
28 0
|
||||
\N 1
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
||||
\N 0
|
96
tests/queries/0_stateless/01457_order_by_nulls_first.sql
Normal file
96
tests/queries/0_stateless/01457_order_by_nulls_first.sql
Normal file
@ -0,0 +1,96 @@
|
||||
drop table if exists order_by_nulls_first;
|
||||
|
||||
CREATE TABLE order_by_nulls_first
|
||||
(diff Nullable(Int16), traf UInt64)
|
||||
ENGINE = MergeTree ORDER BY tuple();
|
||||
|
||||
insert into order_by_nulls_first values (NULL,1),(NULL,0),(NULL,0),(NULL,0),(NULL,0),(NULL,0),(28,0),(0,0);
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
order by diff desc NULLS FIRST, traf
|
||||
limit 1, 4;
|
||||
|
||||
select '--- DESC NULLS FIRST, ASC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff DESC NULLS FIRST,
|
||||
traf ASC;
|
||||
|
||||
select '--- DESC NULLS LAST, ASC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff DESC NULLS LAST,
|
||||
traf ASC;
|
||||
|
||||
select '--- ASC NULLS FIRST, ASC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff ASC NULLS FIRST,
|
||||
traf ASC;
|
||||
|
||||
select '--- ASC NULLS LAST, ASC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff ASC NULLS LAST,
|
||||
traf ASC;
|
||||
|
||||
select '--- DESC NULLS FIRST, DESC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff DESC NULLS FIRST,
|
||||
traf DESC;
|
||||
|
||||
select '--- DESC NULLS LAST, DESC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff DESC NULLS LAST,
|
||||
traf DESC;
|
||||
|
||||
select '--- ASC NULLS FIRST, DESC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff ASC NULLS FIRST,
|
||||
traf DESC;
|
||||
|
||||
select '--- ASC NULLS LAST, DESC';
|
||||
|
||||
SELECT
|
||||
diff,
|
||||
traf
|
||||
FROM order_by_nulls_first
|
||||
ORDER BY
|
||||
diff ASC NULLS LAST,
|
||||
traf DESC;
|
||||
|
||||
drop table if exists order_by_nulls_first;
|
Loading…
Reference in New Issue
Block a user