mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #16273 from Avogar/collation-support
Support collate in LowCardinality, Nullable, Array and Tuple, where nested type is String
This commit is contained in:
commit
4e85d6a4c3
@ -240,6 +240,10 @@ TESTS_TO_SKIP=(
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
|
@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n)
|
||||
offsets_data.resize_assume_reserved(offsets_data.size() - n);
|
||||
}
|
||||
|
||||
|
||||
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
|
||||
int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const
|
||||
{
|
||||
const ColumnArray & rhs = assert_cast<const ColumnArray &>(rhs_);
|
||||
|
||||
@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
|
||||
size_t rhs_size = rhs.sizeAt(m);
|
||||
size_t min_size = std::min(lhs_size, rhs_size);
|
||||
for (size_t i = 0; i < min_size; ++i)
|
||||
if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint))
|
||||
{
|
||||
int res;
|
||||
if (collator)
|
||||
res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator);
|
||||
else
|
||||
res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
|
||||
return lhs_size < rhs_size
|
||||
? -1
|
||||
@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
|
||||
: 1);
|
||||
}
|
||||
|
||||
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs_, nan_direction_hint);
|
||||
}
|
||||
|
||||
int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
|
||||
}
|
||||
|
||||
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const
|
||||
@ -352,27 +368,26 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
|
||||
namespace
|
||||
template <bool positive>
|
||||
struct ColumnArray::Cmp
|
||||
{
|
||||
template <bool positive>
|
||||
struct Less
|
||||
const ColumnArray & parent;
|
||||
int nan_direction_hint;
|
||||
const Collator * collator;
|
||||
|
||||
Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr)
|
||||
: parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {}
|
||||
|
||||
int operator()(size_t lhs, size_t rhs) const
|
||||
{
|
||||
const ColumnArray & parent;
|
||||
int nan_direction_hint;
|
||||
|
||||
Less(const ColumnArray & parent_, int nan_direction_hint_)
|
||||
: parent(parent_), nan_direction_hint(nan_direction_hint_) {}
|
||||
|
||||
bool operator()(size_t lhs, size_t rhs) const
|
||||
{
|
||||
if (positive)
|
||||
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0;
|
||||
else
|
||||
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int res;
|
||||
if (collator)
|
||||
res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
|
||||
else
|
||||
res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
|
||||
return positive ? res : -res;
|
||||
}
|
||||
};
|
||||
|
||||
void ColumnArray::reserve(size_t n)
|
||||
{
|
||||
@ -753,7 +768,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit
|
||||
|
||||
INSTANTIATE_INDEX_IMPL(ColumnArray)
|
||||
|
||||
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
template <typename Comparator>
|
||||
void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
|
||||
{
|
||||
size_t s = size();
|
||||
if (limit >= s)
|
||||
@ -763,23 +779,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
|
||||
for (size_t i = 0; i < s; ++i)
|
||||
res[i] = i;
|
||||
|
||||
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
|
||||
|
||||
if (limit)
|
||||
{
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(*this, nan_direction_hint));
|
||||
else
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(*this, nan_direction_hint));
|
||||
}
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
else
|
||||
{
|
||||
if (reverse)
|
||||
std::sort(res.begin(), res.end(), Less<false>(*this, nan_direction_hint));
|
||||
else
|
||||
std::sort(res.begin(), res.end(), Less<true>(*this, nan_direction_hint));
|
||||
}
|
||||
std::sort(res.begin(), res.end(), less);
|
||||
}
|
||||
|
||||
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
|
||||
template <typename Comparator>
|
||||
void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const
|
||||
{
|
||||
if (equal_range.empty())
|
||||
return;
|
||||
@ -792,20 +801,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
if (limit)
|
||||
--number_of_ranges;
|
||||
|
||||
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
|
||||
|
||||
EqualRanges new_ranges;
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto & [first, last] = equal_range[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, Less<true>(*this, nan_direction_hint));
|
||||
std::sort(res.begin() + first, res.begin() + last, less);
|
||||
auto new_first = first;
|
||||
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
|
||||
if (cmp(res[new_first], res[j]) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
@ -827,14 +835,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<true>(*this, nan_direction_hint));
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
|
||||
if (cmp(res[new_first], res[j]) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
@ -845,7 +850,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
auto new_last = limit;
|
||||
for (auto j = limit; j < last; ++j)
|
||||
{
|
||||
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0)
|
||||
if (cmp(res[new_first], res[j]) == 0)
|
||||
{
|
||||
std::swap(res[new_last], res[j]);
|
||||
++new_last;
|
||||
@ -859,6 +864,39 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
equal_range = std::move(new_ranges);
|
||||
}
|
||||
|
||||
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
if (reverse)
|
||||
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint));
|
||||
else
|
||||
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint));
|
||||
|
||||
}
|
||||
|
||||
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
if (reverse)
|
||||
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint));
|
||||
else
|
||||
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint));
|
||||
}
|
||||
|
||||
void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
if (reverse)
|
||||
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint, &collator));
|
||||
else
|
||||
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint, &collator));
|
||||
}
|
||||
|
||||
void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
if (reverse)
|
||||
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint, &collator));
|
||||
else
|
||||
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
|
||||
}
|
||||
|
||||
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
|
||||
{
|
||||
if (replicate_offsets.empty())
|
||||
|
@ -77,8 +77,11 @@ public:
|
||||
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const override;
|
||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
|
||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
|
||||
void reserve(size_t n) override;
|
||||
size_t byteSize() const override;
|
||||
size_t allocatedBytes() const override;
|
||||
@ -132,6 +135,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isCollationSupported() const override { return getData().isCollationSupported(); }
|
||||
|
||||
private:
|
||||
WrappedPtr data;
|
||||
WrappedPtr offsets;
|
||||
@ -169,6 +174,17 @@ private:
|
||||
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
|
||||
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
|
||||
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
|
||||
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
template <typename Comparator>
|
||||
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
|
||||
|
||||
template <typename Comparator>
|
||||
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
|
||||
|
||||
template <bool positive>
|
||||
struct Cmp;
|
||||
};
|
||||
|
||||
|
||||
|
@ -248,6 +248,8 @@ public:
|
||||
/// The constant value. It is valid even if the size of the column is 0.
|
||||
template <typename T>
|
||||
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
|
||||
|
||||
bool isCollationSupported() const override { return data->isCollationSupported(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
@ -278,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
|
||||
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
|
||||
}
|
||||
|
||||
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
|
||||
int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
|
||||
{
|
||||
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
|
||||
size_t n_index = getIndexes().getUInt(n);
|
||||
size_t m_index = low_cardinality_column.getIndexes().getUInt(m);
|
||||
if (collator)
|
||||
return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator);
|
||||
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
|
||||
}
|
||||
|
||||
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs, nan_direction_hint);
|
||||
}
|
||||
|
||||
int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const
|
||||
@ -295,14 +308,17 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const
|
||||
{
|
||||
if (limit == 0)
|
||||
limit = size();
|
||||
|
||||
size_t unique_limit = getDictionary().size();
|
||||
Permutation unique_perm;
|
||||
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
|
||||
if (collator)
|
||||
getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm);
|
||||
else
|
||||
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
|
||||
|
||||
/// TODO: optimize with sse.
|
||||
|
||||
@ -330,7 +346,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
template <typename Cmp>
|
||||
void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
@ -345,20 +362,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
EqualRanges new_ranges;
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; };
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
|
||||
std::sort(res.begin() + first, res.begin() + last, less);
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
|
||||
if (comparator(res[new_first], res[j]) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
@ -379,17 +393,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
|
||||
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
|
||||
auto new_first = first;
|
||||
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0)
|
||||
if (comparator(res[new_first],res[j]) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
@ -401,7 +410,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
auto new_last = limit;
|
||||
for (auto j = limit; j < last; ++j)
|
||||
{
|
||||
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0)
|
||||
if (comparator(res[new_first], res[j]) == 0)
|
||||
{
|
||||
std::swap(res[new_last], res[j]);
|
||||
++new_last;
|
||||
@ -412,6 +421,38 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
getPermutationImpl(reverse, limit, nan_direction_hint, res);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs)
|
||||
{
|
||||
int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint);
|
||||
return reverse ? -ret : ret;
|
||||
};
|
||||
|
||||
updatePermutationImpl(limit, res, equal_ranges, comparator);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator);
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs)
|
||||
{
|
||||
int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator);
|
||||
return reverse ? -ret : ret;
|
||||
};
|
||||
|
||||
updatePermutationImpl(limit, res, equal_ranges, comparator);
|
||||
}
|
||||
|
||||
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
|
||||
{
|
||||
auto columns = getIndexes().scatter(num_columns, selector);
|
||||
|
@ -125,10 +125,16 @@ public:
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const override;
|
||||
|
||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
|
||||
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
|
||||
void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override;
|
||||
|
||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
|
||||
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & offsets) const override
|
||||
{
|
||||
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
|
||||
@ -170,6 +176,7 @@ public:
|
||||
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
|
||||
bool isNumeric() const override { return getDictionary().isNumeric(); }
|
||||
bool lowCardinality() const override { return true; }
|
||||
bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); }
|
||||
|
||||
/**
|
||||
* Checks if the dictionary column is Nullable(T).
|
||||
@ -309,6 +316,13 @@ private:
|
||||
|
||||
void compactInplace();
|
||||
void compactIfSharedDictionary();
|
||||
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
|
||||
|
||||
template <typename Cmp>
|
||||
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/WeakHash.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
|
||||
@ -223,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
|
||||
return ColumnNullable::create(indexed_data, indexed_null_map);
|
||||
}
|
||||
|
||||
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
|
||||
int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const
|
||||
{
|
||||
/// NULL values share the properties of NaN values.
|
||||
/// Here the last parameter of compareAt is called null_direction_hint
|
||||
@ -245,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null
|
||||
}
|
||||
|
||||
const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
|
||||
if (collator)
|
||||
return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator);
|
||||
|
||||
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
|
||||
}
|
||||
|
||||
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs_, null_direction_hint);
|
||||
}
|
||||
|
||||
int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
|
||||
}
|
||||
|
||||
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const
|
||||
@ -256,10 +270,14 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
|
||||
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
|
||||
void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
|
||||
{
|
||||
/// Cannot pass limit because of unknown amount of NULLs.
|
||||
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
|
||||
|
||||
if (collator)
|
||||
getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res);
|
||||
else
|
||||
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
|
||||
|
||||
if ((null_direction_hint > 0) != reverse)
|
||||
{
|
||||
@ -329,7 +347,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
@ -432,12 +450,35 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire
|
||||
}
|
||||
}
|
||||
|
||||
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
|
||||
if (collator)
|
||||
getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges);
|
||||
else
|
||||
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
|
||||
|
||||
equal_ranges = std::move(new_ranges);
|
||||
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
|
||||
}
|
||||
|
||||
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
|
||||
{
|
||||
getPermutationImpl(reverse, limit, null_direction_hint, res);
|
||||
}
|
||||
|
||||
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges);
|
||||
}
|
||||
|
||||
void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
|
||||
{
|
||||
getPermutationImpl(reverse, limit, null_direction_hint, res, &collator);
|
||||
}
|
||||
|
||||
void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const
|
||||
{
|
||||
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator);
|
||||
}
|
||||
|
||||
void ColumnNullable::gather(ColumnGathererStream & gatherer)
|
||||
{
|
||||
gatherer.gather(*this);
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
class Collator;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -92,8 +93,12 @@ public:
|
||||
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const override;
|
||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
|
||||
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
|
||||
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
|
||||
void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
|
||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
|
||||
void updatePermutationWithCollation(
|
||||
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
|
||||
void reserve(size_t n) override;
|
||||
size_t byteSize() const override;
|
||||
size_t allocatedBytes() const override;
|
||||
@ -129,6 +134,7 @@ public:
|
||||
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
|
||||
size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); }
|
||||
bool onlyNull() const override { return nested_column->isDummy(); }
|
||||
bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
|
||||
|
||||
|
||||
/// Return the column that represents values.
|
||||
@ -164,6 +170,13 @@ private:
|
||||
|
||||
template <bool negative>
|
||||
void applyNullMapImpl(const ColumnUInt8 & map);
|
||||
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
|
||||
|
||||
void updatePermutationImpl(
|
||||
bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const;
|
||||
};
|
||||
|
||||
ColumnPtr makeNullable(const ColumnPtr & column);
|
||||
|
@ -285,21 +285,22 @@ void ColumnString::compareColumn(
|
||||
}
|
||||
|
||||
template <bool positive>
|
||||
struct ColumnString::less
|
||||
struct ColumnString::Cmp
|
||||
{
|
||||
const ColumnString & parent;
|
||||
explicit less(const ColumnString & parent_) : parent(parent_) {}
|
||||
bool operator()(size_t lhs, size_t rhs) const
|
||||
explicit Cmp(const ColumnString & parent_) : parent(parent_) {}
|
||||
int operator()(size_t lhs, size_t rhs) const
|
||||
{
|
||||
int res = memcmpSmallAllowOverflow15(
|
||||
parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1,
|
||||
parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1);
|
||||
|
||||
return positive ? (res < 0) : (res > 0);
|
||||
return positive ? res : -res;
|
||||
}
|
||||
};
|
||||
|
||||
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
|
||||
template <typename Comparator>
|
||||
void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
|
||||
{
|
||||
size_t s = offsets.size();
|
||||
res.resize(s);
|
||||
@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio
|
||||
if (limit >= s)
|
||||
limit = 0;
|
||||
|
||||
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
|
||||
|
||||
if (limit)
|
||||
{
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
|
||||
else
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
|
||||
}
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
else
|
||||
{
|
||||
if (reverse)
|
||||
std::sort(res.begin(), res.end(), less<false>(*this));
|
||||
else
|
||||
std::sort(res.begin(), res.end(), less<true>(*this));
|
||||
}
|
||||
std::sort(res.begin(), res.end(), less);
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
|
||||
template <typename Comparator>
|
||||
void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
if (limit)
|
||||
--number_of_ranges;
|
||||
|
||||
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto & [first, last] = equal_ranges[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
|
||||
std::sort(res.begin() + first, res.begin() + last, less);
|
||||
|
||||
size_t new_first = first;
|
||||
for (size_t j = first + 1; j < last; ++j)
|
||||
{
|
||||
if (memcmpSmallAllowOverflow15(
|
||||
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
|
||||
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
|
||||
if (cmp(res[j], res[new_first]) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
|
||||
|
||||
size_t new_first = first;
|
||||
for (size_t j = first + 1; j < limit; ++j)
|
||||
{
|
||||
if (memcmpSmallAllowOverflow15(
|
||||
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
|
||||
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
|
||||
if (cmp(res[j], res[new_first]) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
size_t new_last = limit;
|
||||
for (size_t j = limit; j < last; ++j)
|
||||
{
|
||||
if (memcmpSmallAllowOverflow15(
|
||||
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
|
||||
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0)
|
||||
if (cmp(res[j], res[new_first]) == 0)
|
||||
{
|
||||
std::swap(res[j], res[new_last]);
|
||||
++new_last;
|
||||
@ -408,6 +391,56 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
|
||||
{
|
||||
if (reverse)
|
||||
getPermutationImpl(limit, res, Cmp<false>(*this));
|
||||
else
|
||||
getPermutationImpl(limit, res, Cmp<true>(*this));
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (reverse)
|
||||
updatePermutationImpl(limit, res, equal_ranges, Cmp<false>(*this));
|
||||
else
|
||||
updatePermutationImpl(limit, res, equal_ranges, Cmp<true>(*this));
|
||||
}
|
||||
|
||||
template <bool positive>
|
||||
struct ColumnString::CmpWithCollation
|
||||
{
|
||||
const ColumnString & parent;
|
||||
const Collator & collator;
|
||||
|
||||
CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
|
||||
|
||||
int operator()(size_t lhs, size_t rhs) const
|
||||
{
|
||||
int res = collator.compare(
|
||||
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
|
||||
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
|
||||
|
||||
return positive ? res : -res;
|
||||
}
|
||||
};
|
||||
|
||||
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const
|
||||
{
|
||||
if (reverse)
|
||||
getPermutationImpl(limit, res, CmpWithCollation<false>(*this, collator));
|
||||
else
|
||||
getPermutationImpl(limit, res, CmpWithCollation<true>(*this, collator));
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (reverse)
|
||||
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<false>(*this, collator));
|
||||
else
|
||||
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<true>(*this, collator));
|
||||
}
|
||||
|
||||
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
|
||||
{
|
||||
size_t col_size = size();
|
||||
@ -476,13 +509,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const
|
||||
size_t min_idx = 0;
|
||||
size_t max_idx = 0;
|
||||
|
||||
less<true> less_op(*this);
|
||||
Cmp<true> cmp_op(*this);
|
||||
|
||||
for (size_t i = 1; i < col_size; ++i)
|
||||
{
|
||||
if (less_op(i, min_idx))
|
||||
if (cmp_op(i, min_idx) < 0)
|
||||
min_idx = i;
|
||||
else if (less_op(max_idx, i))
|
||||
else if (cmp_op(max_idx, i) < 0)
|
||||
max_idx = i;
|
||||
}
|
||||
|
||||
@ -491,7 +524,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const
|
||||
}
|
||||
|
||||
|
||||
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const
|
||||
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
|
||||
{
|
||||
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
|
||||
|
||||
@ -500,134 +533,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs
|
||||
reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m));
|
||||
}
|
||||
|
||||
|
||||
template <bool positive>
|
||||
struct ColumnString::lessWithCollation
|
||||
{
|
||||
const ColumnString & parent;
|
||||
const Collator & collator;
|
||||
|
||||
lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
|
||||
|
||||
bool operator()(size_t lhs, size_t rhs) const
|
||||
{
|
||||
int res = collator.compare(
|
||||
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
|
||||
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
|
||||
|
||||
return positive ? (res < 0) : (res > 0);
|
||||
}
|
||||
};
|
||||
|
||||
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
|
||||
{
|
||||
size_t s = offsets.size();
|
||||
res.resize(s);
|
||||
for (size_t i = 0; i < s; ++i)
|
||||
res[i] = i;
|
||||
|
||||
if (limit >= s)
|
||||
limit = 0;
|
||||
|
||||
if (limit)
|
||||
{
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<true>(*this, collator));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (reverse)
|
||||
std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
if (limit >= size() || limit >= equal_ranges.back().second)
|
||||
limit = 0;
|
||||
|
||||
size_t number_of_ranges = equal_ranges.size();
|
||||
if (limit)
|
||||
--number_of_ranges;
|
||||
|
||||
EqualRanges new_ranges;
|
||||
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
|
||||
|
||||
for (size_t i = 0; i < number_of_ranges; ++i)
|
||||
{
|
||||
const auto& [first, last] = equal_ranges[i];
|
||||
|
||||
if (reverse)
|
||||
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<true>(*this, collator));
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < last; ++j)
|
||||
{
|
||||
if (collator.compare(
|
||||
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
|
||||
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
|
||||
new_first = j;
|
||||
}
|
||||
}
|
||||
if (last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, last);
|
||||
}
|
||||
|
||||
if (limit)
|
||||
{
|
||||
const auto & [first, last] = equal_ranges.back();
|
||||
|
||||
if (limit < first || limit > last)
|
||||
return;
|
||||
|
||||
/// Since then we are working inside the interval.
|
||||
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<false>(*this, collator));
|
||||
else
|
||||
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<true>(*this, collator));
|
||||
|
||||
auto new_first = first;
|
||||
for (auto j = first + 1; j < limit; ++j)
|
||||
{
|
||||
if (collator.compare(
|
||||
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
|
||||
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
|
||||
{
|
||||
if (j - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, j);
|
||||
|
||||
new_first = j;
|
||||
}
|
||||
}
|
||||
auto new_last = limit;
|
||||
for (auto j = limit; j < last; ++j)
|
||||
{
|
||||
if (collator.compare(
|
||||
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
|
||||
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0)
|
||||
{
|
||||
std::swap(res[new_last], res[j]);
|
||||
++new_last;
|
||||
}
|
||||
}
|
||||
if (new_last - new_first > 1)
|
||||
new_ranges.emplace_back(new_first, new_last);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnString::protect()
|
||||
{
|
||||
getChars().protect();
|
||||
|
@ -43,14 +43,20 @@ private:
|
||||
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
|
||||
|
||||
template <bool positive>
|
||||
struct less;
|
||||
struct Cmp;
|
||||
|
||||
template <bool positive>
|
||||
struct lessWithCollation;
|
||||
struct CmpWithCollation;
|
||||
|
||||
ColumnString() = default;
|
||||
ColumnString(const ColumnString & src);
|
||||
|
||||
template <typename Comparator>
|
||||
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
|
||||
|
||||
template <typename Comparator>
|
||||
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const;
|
||||
|
||||
public:
|
||||
const char * getFamilyName() const override { return "String"; }
|
||||
TypeIndex getDataType() const override { return TypeIndex::String; }
|
||||
@ -229,16 +235,16 @@ public:
|
||||
int direction, int nan_direction_hint) const override;
|
||||
|
||||
/// Variant of compareAt for string comparison with respect of collation.
|
||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
|
||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
|
||||
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
|
||||
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
|
||||
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
/// Sorting with respect of collation.
|
||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const;
|
||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override;
|
||||
|
||||
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const;
|
||||
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
|
||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||
|
||||
@ -270,6 +276,8 @@ public:
|
||||
|
||||
// Throws an exception if offsets/chars are messed up
|
||||
void validate() const;
|
||||
|
||||
bool isCollationSupported() const override { return true; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se
|
||||
return res;
|
||||
}
|
||||
|
||||
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
|
||||
int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
|
||||
{
|
||||
const size_t tuple_size = columns.size();
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
if (int res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint))
|
||||
{
|
||||
int res;
|
||||
if (collator && columns[i]->isCollationSupported())
|
||||
res = columns[i]->compareAtWithCollation(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint, *collator);
|
||||
else
|
||||
res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint);
|
||||
if (res)
|
||||
return res;
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs, nan_direction_hint);
|
||||
}
|
||||
|
||||
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const
|
||||
@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
|
||||
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
|
||||
{
|
||||
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
|
||||
}
|
||||
|
||||
template <bool positive>
|
||||
struct ColumnTuple::Less
|
||||
{
|
||||
TupleColumns columns;
|
||||
int nan_direction_hint;
|
||||
const Collator * collator;
|
||||
|
||||
Less(const TupleColumns & columns_, int nan_direction_hint_)
|
||||
: columns(columns_), nan_direction_hint(nan_direction_hint_)
|
||||
Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr)
|
||||
: columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -308,7 +325,11 @@ struct ColumnTuple::Less
|
||||
{
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
int res = column->compareAt(a, b, *column, nan_direction_hint);
|
||||
int res;
|
||||
if (collator && column->isCollationSupported())
|
||||
res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
|
||||
else
|
||||
res = column->compareAt(a, b, *column, nan_direction_hint);
|
||||
if (res < 0)
|
||||
return positive;
|
||||
else if (res > 0)
|
||||
@ -318,7 +339,8 @@ struct ColumnTuple::Less
|
||||
}
|
||||
};
|
||||
|
||||
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
template <typename LessOperator>
|
||||
void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const
|
||||
{
|
||||
size_t rows = size();
|
||||
res.resize(rows);
|
||||
@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
|
||||
|
||||
if (limit)
|
||||
{
|
||||
if (reverse)
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(columns, nan_direction_hint));
|
||||
else
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(columns, nan_direction_hint));
|
||||
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (reverse)
|
||||
std::sort(res.begin(), res.end(), Less<false>(columns, nan_direction_hint));
|
||||
else
|
||||
std::sort(res.begin(), res.end(), Less<true>(columns, nan_direction_hint));
|
||||
std::sort(res.begin(), res.end(), less);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
|
||||
{
|
||||
if (equal_ranges.empty())
|
||||
return;
|
||||
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
if (collator && column->isCollationSupported())
|
||||
column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
else
|
||||
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
|
||||
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
|
||||
equal_ranges.pop_back();
|
||||
@ -361,6 +380,32 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
if (reverse)
|
||||
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint));
|
||||
else
|
||||
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint));
|
||||
}
|
||||
|
||||
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges);
|
||||
}
|
||||
|
||||
void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
||||
{
|
||||
if (reverse)
|
||||
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint, &collator));
|
||||
else
|
||||
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint, &collator));
|
||||
}
|
||||
|
||||
void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
|
||||
{
|
||||
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator);
|
||||
}
|
||||
|
||||
void ColumnTuple::gather(ColumnGathererStream & gatherer)
|
||||
{
|
||||
gatherer.gather(*this);
|
||||
@ -433,5 +478,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ColumnTuple::isCollationSupported() const
|
||||
{
|
||||
for (const auto& column : columns)
|
||||
{
|
||||
if (column->isCollationSupported())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -75,15 +75,19 @@ public:
|
||||
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
||||
int direction, int nan_direction_hint) const override;
|
||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
|
||||
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
|
||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
||||
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
|
||||
void reserve(size_t n) override;
|
||||
size_t byteSize() const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void protect() override;
|
||||
void forEachSubcolumn(ColumnCallback callback) override;
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
bool isCollationSupported() const override;
|
||||
|
||||
size_t tupleSize() const { return columns.size(); }
|
||||
|
||||
@ -94,6 +98,15 @@ public:
|
||||
Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }
|
||||
|
||||
const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
|
||||
|
||||
private:
|
||||
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
|
||||
|
||||
template <typename LessOperator>
|
||||
void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const;
|
||||
|
||||
void updatePermutationImpl(
|
||||
bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
|
||||
class SipHash;
|
||||
|
||||
class Collator;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -18,6 +18,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_GET_SIZE_OF_FIELD;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int BAD_COLLATION;
|
||||
}
|
||||
|
||||
class Arena;
|
||||
@ -250,6 +251,12 @@ public:
|
||||
*/
|
||||
virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
|
||||
|
||||
/// Equivalent to compareAt, but collator is used to compare values.
|
||||
virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
|
||||
{
|
||||
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
|
||||
}
|
||||
|
||||
/// Compare the whole column with single value from rhs column.
|
||||
/// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare.
|
||||
/// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction
|
||||
@ -277,6 +284,18 @@ public:
|
||||
*/
|
||||
virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0;
|
||||
|
||||
/** Equivalent to getPermutation and updatePermutation but collator is used to compare values.
|
||||
* Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them.
|
||||
*/
|
||||
virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const
|
||||
{
|
||||
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
|
||||
}
|
||||
virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const
|
||||
{
|
||||
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
|
||||
}
|
||||
|
||||
/** Copies each element according offsets parameter.
|
||||
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
||||
* It is necessary in ARRAY JOIN operation.
|
||||
@ -402,6 +421,8 @@ public:
|
||||
|
||||
virtual bool lowCardinality() const { return false; }
|
||||
|
||||
virtual bool isCollationSupported() const { return false; }
|
||||
|
||||
virtual ~IColumn() = default;
|
||||
IColumn() = default;
|
||||
IColumn(const IColumn &) = default;
|
||||
|
@ -96,7 +96,7 @@ struct SortCursorImpl
|
||||
: column_desc.column_number;
|
||||
sort_columns.push_back(columns[column_number].get());
|
||||
|
||||
need_collation[j] = desc[j].collator != nullptr && typeid_cast<const ColumnString *>(sort_columns.back()); /// TODO Nullable(String)
|
||||
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String)
|
||||
has_collation |= need_collation[j];
|
||||
}
|
||||
|
||||
@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
|
||||
int nulls_direction = desc.nulls_direction;
|
||||
int res;
|
||||
if (impl->need_collation[i])
|
||||
{
|
||||
const ColumnString & column_string = assert_cast<const ColumnString &>(*impl->sort_columns[i]);
|
||||
res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator);
|
||||
}
|
||||
res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator);
|
||||
else
|
||||
res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction);
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
@ -86,8 +88,7 @@ struct PartialSortingLessWithCollation
|
||||
}
|
||||
else if (isCollationRequired(elem.description))
|
||||
{
|
||||
const ColumnString & column_string = assert_cast<const ColumnString &>(*elem.column);
|
||||
res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator);
|
||||
res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator);
|
||||
}
|
||||
else
|
||||
res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction);
|
||||
@ -101,7 +102,6 @@ struct PartialSortingLessWithCollation
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
||||
{
|
||||
if (!block)
|
||||
@ -120,14 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
||||
bool is_column_const = false;
|
||||
if (isCollationRequired(description[0]))
|
||||
{
|
||||
/// it it's real string column, than we need sort
|
||||
if (const ColumnString * column_string = checkAndGetColumn<ColumnString>(column))
|
||||
column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm);
|
||||
else if (checkAndGetColumnConstData<ColumnString>(column))
|
||||
if (!column->isCollationSupported())
|
||||
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
|
||||
|
||||
if (isColumnConst(*column))
|
||||
is_column_const = true;
|
||||
else
|
||||
throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION);
|
||||
|
||||
column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
|
||||
}
|
||||
else if (!isColumnConst(*column))
|
||||
{
|
||||
@ -163,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
||||
const IColumn * column = columns_with_sort_desc[i].column;
|
||||
if (isCollationRequired(description[i]))
|
||||
{
|
||||
if (!checkAndGetColumn<ColumnString>(column) && !checkAndGetColumnConstData<ColumnString>(column))
|
||||
throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION);
|
||||
if (!column->isCollationSupported())
|
||||
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
|
||||
|
||||
need_collation = true;
|
||||
}
|
||||
@ -187,10 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
|
||||
|
||||
if (isCollationRequired(column.description))
|
||||
{
|
||||
const ColumnString & column_string = assert_cast<const ColumnString &>(*column.column);
|
||||
column_string.updatePermutationWithCollation(
|
||||
*column.description.collator,
|
||||
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
|
||||
column.column->updatePermutationWithCollation(
|
||||
*column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -0,0 +1,64 @@
|
||||
Order by without collate
|
||||
1 Ё
|
||||
2 А
|
||||
2 Я
|
||||
1 а
|
||||
2 я
|
||||
1 ё
|
||||
Order by with collate
|
||||
1 а
|
||||
2 А
|
||||
1 ё
|
||||
1 Ё
|
||||
2 я
|
||||
2 Я
|
||||
Order by tuple without collate
|
||||
1 Ё
|
||||
1 а
|
||||
1 ё
|
||||
2 А
|
||||
2 Я
|
||||
2 я
|
||||
Order by tuple with collate
|
||||
1 а
|
||||
1 ё
|
||||
1 Ё
|
||||
2 А
|
||||
2 я
|
||||
2 Я
|
||||
Order by without collate
|
||||
1 Ё
|
||||
2 А
|
||||
2 Я
|
||||
1 а
|
||||
2 я
|
||||
1 ё
|
||||
1 \N
|
||||
2 \N
|
||||
Order by with collate
|
||||
1 а
|
||||
2 А
|
||||
1 ё
|
||||
1 Ё
|
||||
2 я
|
||||
2 Я
|
||||
1 \N
|
||||
2 \N
|
||||
Order by tuple without collate
|
||||
1 Ё
|
||||
1 а
|
||||
1 ё
|
||||
1 \N
|
||||
2 А
|
||||
2 Я
|
||||
2 я
|
||||
2 \N
|
||||
Order by tuple with collate
|
||||
1 а
|
||||
1 ё
|
||||
1 Ё
|
||||
1 \N
|
||||
2 А
|
||||
2 я
|
||||
2 Я
|
||||
2 \N
|
@ -0,0 +1,33 @@
|
||||
DROP TABLE IF EXISTS test_collate;
|
||||
DROP TABLE IF EXISTS test_collate_null;
|
||||
|
||||
CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory();
|
||||
CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory();
|
||||
|
||||
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я');
|
||||
INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null);
|
||||
|
||||
|
||||
SELECT 'Order by without collate';
|
||||
SELECT * FROM test_collate ORDER BY s;
|
||||
SELECT 'Order by with collate';
|
||||
SELECT * FROM test_collate ORDER BY s COLLATE 'ru';
|
||||
|
||||
SELECT 'Order by tuple without collate';
|
||||
SELECT * FROM test_collate ORDER BY x, s;
|
||||
SELECT 'Order by tuple with collate';
|
||||
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
|
||||
|
||||
SELECT 'Order by without collate';
|
||||
SELECT * FROM test_collate_null ORDER BY s;
|
||||
SELECT 'Order by with collate';
|
||||
SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru';
|
||||
|
||||
SELECT 'Order by tuple without collate';
|
||||
SELECT * FROM test_collate_null ORDER BY x, s;
|
||||
SELECT 'Order by tuple with collate';
|
||||
SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru';
|
||||
|
||||
|
||||
DROP TABLE test_collate;
|
||||
DROP TABLE test_collate_null;
|
@ -0,0 +1,36 @@
|
||||
Order by without collate
|
||||
1 Ё
|
||||
2 А
|
||||
2 Я
|
||||
1 а
|
||||
2 я
|
||||
1 ё
|
||||
1 \N
|
||||
2 \N
|
||||
Order by with collate
|
||||
1 а
|
||||
2 А
|
||||
1 ё
|
||||
1 Ё
|
||||
2 я
|
||||
2 Я
|
||||
1 \N
|
||||
2 \N
|
||||
Order by tuple without collate
|
||||
1 Ё
|
||||
1 а
|
||||
1 ё
|
||||
1 \N
|
||||
2 А
|
||||
2 Я
|
||||
2 я
|
||||
2 \N
|
||||
Order by tuple with collate
|
||||
1 а
|
||||
1 ё
|
||||
1 Ё
|
||||
1 \N
|
||||
2 А
|
||||
2 я
|
||||
2 Я
|
||||
2 \N
|
18
tests/queries/0_stateless/01533_collate_in_nullable.sql
Normal file
18
tests/queries/0_stateless/01533_collate_in_nullable.sql
Normal file
@ -0,0 +1,18 @@
|
||||
DROP TABLE IF EXISTS test_collate;
|
||||
|
||||
CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory();
|
||||
|
||||
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null);
|
||||
|
||||
SELECT 'Order by without collate';
|
||||
SELECT * FROM test_collate ORDER BY s;
|
||||
SELECT 'Order by with collate';
|
||||
SELECT * FROM test_collate ORDER BY s COLLATE 'ru';
|
||||
|
||||
SELECT 'Order by tuple without collate';
|
||||
SELECT * FROM test_collate ORDER BY x, s;
|
||||
SELECT 'Order by tuple with collate';
|
||||
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
|
||||
|
||||
DROP TABLE test_collate;
|
||||
|
50
tests/queries/0_stateless/01542_collate_in_array.reference
Normal file
50
tests/queries/0_stateless/01542_collate_in_array.reference
Normal file
@ -0,0 +1,50 @@
|
||||
1 ['а']
|
||||
2 ['А']
|
||||
1 ['ё']
|
||||
1 ['ё','а']
|
||||
2 ['ё','а','а']
|
||||
1 ['ё','я']
|
||||
1 ['Ё']
|
||||
2 ['я','а']
|
||||
2 ['Я']
|
||||
|
||||
1 ['а']
|
||||
1 ['ё']
|
||||
1 ['ё','а']
|
||||
1 ['ё','я']
|
||||
1 ['Ё']
|
||||
2 ['А']
|
||||
2 ['ё','а','а']
|
||||
2 ['я','а']
|
||||
2 ['Я']
|
||||
|
||||
1 ['а']
|
||||
2 ['А']
|
||||
1 ['ё']
|
||||
1 ['ё','а']
|
||||
2 ['ё','а','а',NULL]
|
||||
1 ['ё',NULL,'я']
|
||||
1 ['Ё']
|
||||
2 ['я']
|
||||
2 [NULL,'Я']
|
||||
|
||||
1 ['а']
|
||||
1 ['ё']
|
||||
1 ['ё','а']
|
||||
1 ['ё',NULL,'я']
|
||||
1 ['Ё']
|
||||
2 ['А']
|
||||
2 ['ё','а','а',NULL]
|
||||
2 ['я']
|
||||
2 [NULL,'Я']
|
||||
|
||||
2 [['а','а'],['я','ё']]
|
||||
1 [['а','Ё'],['ё','я']]
|
||||
1 [['а','я'],['а','ё']]
|
||||
2 [['ё']]
|
||||
|
||||
1 [['а','Ё'],['ё','я']]
|
||||
1 [['а','я'],['а','ё']]
|
||||
2 [['а','а'],['я','ё']]
|
||||
2 [['ё']]
|
||||
|
34
tests/queries/0_stateless/01542_collate_in_array.sql
Normal file
34
tests/queries/0_stateless/01542_collate_in_array.sql
Normal file
@ -0,0 +1,34 @@
|
||||
DROP TABLE IF EXISTS collate_test1;
|
||||
DROP TABLE IF EXISTS collate_test2;
|
||||
DROP TABLE IF EXISTS collate_test3;
|
||||
|
||||
CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory();
|
||||
CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory();
|
||||
CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory();
|
||||
|
||||
INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']);
|
||||
INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]);
|
||||
INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]);
|
||||
|
||||
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
DROP TABLE collate_test1;
|
||||
DROP TABLE collate_test2;
|
||||
DROP TABLE collate_test3;
|
||||
|
60
tests/queries/0_stateless/01543_collate_in_tuple.reference
Normal file
60
tests/queries/0_stateless/01543_collate_in_tuple.reference
Normal file
@ -0,0 +1,60 @@
|
||||
1 (1,'а')
|
||||
1 (1,'ё')
|
||||
1 (1,'Ё')
|
||||
2 (1,'я')
|
||||
1 (2,'а')
|
||||
2 (2,'А')
|
||||
2 (2,'Я')
|
||||
1 (3,'я')
|
||||
|
||||
1 (1,'а')
|
||||
1 (1,'ё')
|
||||
1 (1,'Ё')
|
||||
1 (2,'а')
|
||||
1 (3,'я')
|
||||
2 (1,'я')
|
||||
2 (2,'А')
|
||||
2 (2,'Я')
|
||||
|
||||
1 (1,'а')
|
||||
1 (1,'ё')
|
||||
1 (1,'Ё')
|
||||
2 (1,'я')
|
||||
1 (1,NULL)
|
||||
2 (2,'А')
|
||||
2 (2,'Я')
|
||||
1 (2,NULL)
|
||||
2 (2,NULL)
|
||||
1 (3,'я')
|
||||
|
||||
1 (1,'а')
|
||||
1 (1,'ё')
|
||||
1 (1,'Ё')
|
||||
1 (1,NULL)
|
||||
1 (2,NULL)
|
||||
1 (3,'я')
|
||||
2 (1,'я')
|
||||
2 (2,'А')
|
||||
2 (2,'Я')
|
||||
2 (2,NULL)
|
||||
|
||||
2 (1,(1,['А']))
|
||||
2 (1,(1,['ё','а','а']))
|
||||
1 (1,(1,['Ё']))
|
||||
2 (1,(1,['Я']))
|
||||
1 (1,(2,['а']))
|
||||
1 (1,(2,['ё','я']))
|
||||
1 (2,(1,['ё']))
|
||||
1 (2,(1,['ё','а']))
|
||||
2 (2,(1,['я']))
|
||||
|
||||
1 (1,(1,['Ё']))
|
||||
1 (1,(2,['а']))
|
||||
1 (1,(2,['ё','я']))
|
||||
1 (2,(1,['ё']))
|
||||
1 (2,(1,['ё','а']))
|
||||
2 (1,(1,['А']))
|
||||
2 (1,(1,['ё','а','а']))
|
||||
2 (1,(1,['Я']))
|
||||
2 (2,(1,['я']))
|
||||
|
34
tests/queries/0_stateless/01543_collate_in_tuple.sql
Normal file
34
tests/queries/0_stateless/01543_collate_in_tuple.sql
Normal file
@ -0,0 +1,34 @@
|
||||
DROP TABLE IF EXISTS collate_test1;
|
||||
DROP TABLE IF EXISTS collate_test2;
|
||||
DROP TABLE IF EXISTS collate_test3;
|
||||
|
||||
CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory();
|
||||
CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory();
|
||||
CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory();
|
||||
|
||||
INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я'));
|
||||
INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null));
|
||||
INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а'])));
|
||||
|
||||
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
|
||||
SELECT '';
|
||||
|
||||
DROP TABLE collate_test1;
|
||||
DROP TABLE collate_test2;
|
||||
DROP TABLE collate_test3;
|
||||
|
@ -155,6 +155,10 @@
|
||||
01509_dictionary_preallocate
|
||||
01526_max_untracked_memory
|
||||
01530_drop_database_atomic_sync
|
||||
01532_collate_in_low_cardinality
|
||||
01533_collate_in_nullable
|
||||
01542_collate_in_array
|
||||
01543_collate_in_tuple
|
||||
01546_log_queries_min_query_duration_ms
|
||||
01547_query_log_current_database
|
||||
01548_query_log_query_execution_ms
|
||||
|
Loading…
Reference in New Issue
Block a user