Merge pull request #16273 from Avogar/collation-support

Support collate in LowCardinality, Nullable, Array and Tuple, where nested type is String
This commit is contained in:
alexey-milovidov 2020-11-06 21:52:52 +03:00 committed by GitHub
commit 4e85d6a4c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 783 additions and 285 deletions

View File

@ -240,6 +240,10 @@ TESTS_TO_SKIP=(
01354_order_by_tuple_collate_const 01354_order_by_tuple_collate_const
01355_ilike 01355_ilike
01411_bayesian_ab_testing 01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_ _orc_
arrow arrow
avro avro

View File

@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n)
offsets_data.resize_assume_reserved(offsets_data.size() - n); offsets_data.resize_assume_reserved(offsets_data.size() - n);
} }
int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
{ {
const ColumnArray & rhs = assert_cast<const ColumnArray &>(rhs_); const ColumnArray & rhs = assert_cast<const ColumnArray &>(rhs_);
@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
size_t rhs_size = rhs.sizeAt(m); size_t rhs_size = rhs.sizeAt(m);
size_t min_size = std::min(lhs_size, rhs_size); size_t min_size = std::min(lhs_size, rhs_size);
for (size_t i = 0; i < min_size; ++i) for (size_t i = 0; i < min_size; ++i)
if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint)) {
int res;
if (collator)
res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator);
else
res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint);
if (res)
return res; return res;
}
return lhs_size < rhs_size return lhs_size < rhs_size
? -1 ? -1
@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
: 1); : 1);
} }
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint);
}
int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
}
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const int direction, int nan_direction_hint) const
@ -352,27 +368,26 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint); compare_results, direction, nan_direction_hint);
} }
namespace template <bool positive>
struct ColumnArray::Cmp
{ {
template <bool positive> const ColumnArray & parent;
struct Less int nan_direction_hint;
const Collator * collator;
Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr)
: parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{ {
const ColumnArray & parent; int res;
int nan_direction_hint; if (collator)
res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
Less(const ColumnArray & parent_, int nan_direction_hint_) else
: parent(parent_), nan_direction_hint(nan_direction_hint_) {} res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
return positive ? res : -res;
bool operator()(size_t lhs, size_t rhs) const }
{ };
if (positive)
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0;
else
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0;
}
};
}
void ColumnArray::reserve(size_t n) void ColumnArray::reserve(size_t n)
{ {
@ -753,7 +768,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit
INSTANTIATE_INDEX_IMPL(ColumnArray) INSTANTIATE_INDEX_IMPL(ColumnArray)
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const template <typename Comparator>
void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{ {
size_t s = size(); size_t s = size();
if (limit >= s) if (limit >= s)
@ -763,23 +779,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
for (size_t i = 0; i < s; ++i) for (size_t i = 0; i < s; ++i)
res[i] = i; res[i] = i;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit) if (limit)
{ std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(*this, nan_direction_hint));
}
else else
{ std::sort(res.begin(), res.end(), less);
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(*this, nan_direction_hint));
}
} }
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const template <typename Comparator>
void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const
{ {
if (equal_range.empty()) if (equal_range.empty())
return; return;
@ -792,20 +801,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
if (limit) if (limit)
--number_of_ranges; --number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
EqualRanges new_ranges; EqualRanges new_ranges;
for (size_t i = 0; i < number_of_ranges; ++i) for (size_t i = 0; i < number_of_ranges; ++i)
{ {
const auto & [first, last] = equal_range[i]; const auto & [first, last] = equal_range[i];
if (reverse) std::sort(res.begin() + first, res.begin() + last, less);
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin() + first, res.begin() + last, Less<true>(*this, nan_direction_hint));
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < last; ++j) for (auto j = first + 1; j < last; ++j)
{ {
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) if (cmp(res[new_first], res[j]) != 0)
{ {
if (j - new_first > 1) if (j - new_first > 1)
new_ranges.emplace_back(new_first, j); new_ranges.emplace_back(new_first, j);
@ -827,14 +835,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
if (reverse) std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<true>(*this, nan_direction_hint));
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < limit; ++j) for (auto j = first + 1; j < limit; ++j)
{ {
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) if (cmp(res[new_first], res[j]) != 0)
{ {
if (j - new_first > 1) if (j - new_first > 1)
new_ranges.emplace_back(new_first, j); new_ranges.emplace_back(new_first, j);
@ -845,7 +850,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
auto new_last = limit; auto new_last = limit;
for (auto j = limit; j < last; ++j) for (auto j = limit; j < last; ++j)
{ {
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0) if (cmp(res[new_first], res[j]) == 0)
{ {
std::swap(res[new_last], res[j]); std::swap(res[new_last], res[j]);
++new_last; ++new_last;
@ -859,6 +864,39 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
equal_range = std::move(new_ranges); equal_range = std::move(new_ranges);
} }
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint));
else
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint));
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint));
else
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint));
}
void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this, nan_direction_hint, &collator));
else
getPermutationImpl(limit, res, Cmp<true>(*this, nan_direction_hint, &collator));
}
void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_range, Cmp<false>(*this, nan_direction_hint, &collator));
else
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{ {
if (replicate_offsets.empty()) if (replicate_offsets.empty())

View File

@ -77,8 +77,11 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num, void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override; int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override; void reserve(size_t n) override;
size_t byteSize() const override; size_t byteSize() const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
@ -132,6 +135,8 @@ public:
return false; return false;
} }
bool isCollationSupported() const override { return getData().isCollationSupported(); }
private: private:
WrappedPtr data; WrappedPtr data;
WrappedPtr offsets; WrappedPtr offsets;
@ -169,6 +174,17 @@ private:
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
template <bool positive>
struct Cmp;
}; };

View File

@ -248,6 +248,8 @@ public:
/// The constant value. It is valid even if the size of the column is 0. /// The constant value. It is valid even if the size of the column is 0.
template <typename T> template <typename T>
T getValue() const { return getField().safeGet<NearestFieldType<T>>(); } T getValue() const { return getField().safeGet<NearestFieldType<T>>(); }
bool isCollationSupported() const override { return data->isCollationSupported(); }
}; };
} }

View File

@ -1,5 +1,6 @@
#include <Columns/ColumnLowCardinality.h> #include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <DataTypes/NumberTraits.h> #include <DataTypes/NumberTraits.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
@ -278,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size)); return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
} }
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{ {
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs); const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
size_t n_index = getIndexes().getUInt(n); size_t n_index = getIndexes().getUInt(n);
size_t m_index = low_cardinality_column.getIndexes().getUInt(m); size_t m_index = low_cardinality_column.getIndexes().getUInt(m);
if (collator)
return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator);
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
} }
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num, void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const int direction, int nan_direction_hint) const
@ -295,14 +308,17 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num
compare_results, direction, nan_direction_hint); compare_results, direction, nan_direction_hint);
} }
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const
{ {
if (limit == 0) if (limit == 0)
limit = size(); limit = size();
size_t unique_limit = getDictionary().size(); size_t unique_limit = getDictionary().size();
Permutation unique_perm; Permutation unique_perm;
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); if (collator)
getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm);
else
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
/// TODO: optimize with sse. /// TODO: optimize with sse.
@ -330,7 +346,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di
} }
} }
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const template <typename Cmp>
void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const
{ {
if (equal_ranges.empty()) if (equal_ranges.empty())
return; return;
@ -345,20 +362,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
EqualRanges new_ranges; EqualRanges new_ranges;
SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i) for (size_t i = 0; i < number_of_ranges; ++i)
{ {
const auto& [first, last] = equal_ranges[i]; const auto& [first, last] = equal_ranges[i];
if (reverse) std::sort(res.begin() + first, res.begin() + last, less);
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
else
std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < last; ++j) for (auto j = first + 1; j < last; ++j)
{ {
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) if (comparator(res[new_first], res[j]) != 0)
{ {
if (j - new_first > 1) if (j - new_first > 1)
new_ranges.emplace_back(new_first, j); new_ranges.emplace_back(new_first, j);
@ -379,17 +393,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
if (reverse) std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; });
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b)
{return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; });
auto new_first = first; auto new_first = first;
for (auto j = first + 1; j < limit; ++j) for (auto j = first + 1; j < limit; ++j)
{ {
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0) if (comparator(res[new_first],res[j]) != 0)
{ {
if (j - new_first > 1) if (j - new_first > 1)
new_ranges.emplace_back(new_first, j); new_ranges.emplace_back(new_first, j);
@ -401,7 +410,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
auto new_last = limit; auto new_last = limit;
for (auto j = limit; j < last; ++j) for (auto j = limit; j < last; ++j)
{ {
if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0) if (comparator(res[new_first], res[j]) == 0)
{ {
std::swap(res[new_last], res[j]); std::swap(res[new_last], res[j]);
++new_last; ++new_last;
@ -412,6 +421,38 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan
} }
} }
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, nan_direction_hint, res);
}
void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs)
{
int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint);
return reverse ? -ret : ret;
};
updatePermutationImpl(limit, res, equal_ranges, comparator);
}
void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator);
}
void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs)
{
int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator);
return reverse ? -ret : ret;
};
updatePermutationImpl(limit, res, equal_ranges, comparator);
}
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
{ {
auto columns = getIndexes().scatter(num_columns, selector); auto columns = getIndexes().scatter(num_columns, selector);

View File

@ -125,10 +125,16 @@ public:
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override; int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
ColumnPtr replicate(const Offsets & offsets) const override ColumnPtr replicate(const Offsets & offsets) const override
{ {
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
@ -170,6 +176,7 @@ public:
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
bool isNumeric() const override { return getDictionary().isNumeric(); } bool isNumeric() const override { return getDictionary().isNumeric(); }
bool lowCardinality() const override { return true; } bool lowCardinality() const override { return true; }
bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); }
/** /**
* Checks if the dictionary column is Nullable(T). * Checks if the dictionary column is Nullable(T).
@ -309,6 +316,13 @@ private:
void compactInplace(); void compactInplace();
void compactIfSharedDictionary(); void compactIfSharedDictionary();
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
template <typename Cmp>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
}; };

View File

@ -6,6 +6,7 @@
#include <Common/WeakHash.h> #include <Common/WeakHash.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
@ -223,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
return ColumnNullable::create(indexed_data, indexed_null_map); return ColumnNullable::create(indexed_data, indexed_null_map);
} }
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const
{ {
/// NULL values share the properties of NaN values. /// NULL values share the properties of NaN values.
/// Here the last parameter of compareAt is called null_direction_hint /// Here the last parameter of compareAt is called null_direction_hint
@ -245,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null
} }
const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
if (collator)
return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator);
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
} }
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint);
}
int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const int direction, int nan_direction_hint) const
@ -256,10 +270,14 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint); compare_results, direction, nan_direction_hint);
} }
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
{ {
/// Cannot pass limit because of unknown amount of NULLs. /// Cannot pass limit because of unknown amount of NULLs.
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if (collator)
getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res);
else
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
if ((null_direction_hint > 0) != reverse) if ((null_direction_hint > 0) != reverse)
{ {
@ -329,7 +347,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi
} }
} }
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{ {
if (equal_ranges.empty()) if (equal_ranges.empty())
return; return;
@ -432,12 +450,35 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire
} }
} }
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); if (collator)
getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges);
else
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);
equal_ranges = std::move(new_ranges); equal_ranges = std::move(new_ranges);
std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges)); std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges));
} }
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, null_direction_hint, res);
}
void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges);
}
void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
{
getPermutationImpl(reverse, limit, null_direction_hint, res, &collator);
}
void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator);
}
void ColumnNullable::gather(ColumnGathererStream & gatherer) void ColumnNullable::gather(ColumnGathererStream & gatherer)
{ {
gatherer.gather(*this); gatherer.gather(*this);

View File

@ -6,6 +6,7 @@
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
class Collator;
namespace DB namespace DB
{ {
@ -92,8 +93,12 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num, void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override; int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override; void reserve(size_t n) override;
size_t byteSize() const override; size_t byteSize() const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
@ -129,6 +134,7 @@ public:
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); } size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); }
bool onlyNull() const override { return nested_column->isDummy(); } bool onlyNull() const override { return nested_column->isDummy(); }
bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
/// Return the column that represents values. /// Return the column that represents values.
@ -164,6 +170,13 @@ private:
template <bool negative> template <bool negative>
void applyNullMapImpl(const ColumnUInt8 & map); void applyNullMapImpl(const ColumnUInt8 & map);
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
void updatePermutationImpl(
bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const;
}; };
ColumnPtr makeNullable(const ColumnPtr & column); ColumnPtr makeNullable(const ColumnPtr & column);

View File

@ -285,21 +285,22 @@ void ColumnString::compareColumn(
} }
template <bool positive> template <bool positive>
struct ColumnString::less struct ColumnString::Cmp
{ {
const ColumnString & parent; const ColumnString & parent;
explicit less(const ColumnString & parent_) : parent(parent_) {} explicit Cmp(const ColumnString & parent_) : parent(parent_) {}
bool operator()(size_t lhs, size_t rhs) const int operator()(size_t lhs, size_t rhs) const
{ {
int res = memcmpSmallAllowOverflow15( int res = memcmpSmallAllowOverflow15(
parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1,
parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1);
return positive ? (res < 0) : (res > 0); return positive ? res : -res;
} }
}; };
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const template <typename Comparator>
void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{ {
size_t s = offsets.size(); size_t s = offsets.size();
res.resize(s); res.resize(s);
@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio
if (limit >= s) if (limit >= s)
limit = 0; limit = 0;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit) if (limit)
{ std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
}
else else
{ std::sort(res.begin(), res.end(), less);
if (reverse)
std::sort(res.begin(), res.end(), less<false>(*this));
else
std::sort(res.begin(), res.end(), less<true>(*this));
}
} }
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const template <typename Comparator>
void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const
{ {
if (equal_ranges.empty()) if (equal_ranges.empty())
return; return;
@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
if (limit) if (limit)
--number_of_ranges; --number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i) for (size_t i = 0; i < number_of_ranges; ++i)
{ {
const auto & [first, last] = equal_ranges[i]; const auto & [first, last] = equal_ranges[i];
std::sort(res.begin() + first, res.begin() + last, less);
if (reverse)
std::sort(res.begin() + first, res.begin() + last, less<false>(*this));
else
std::sort(res.begin() + first, res.begin() + last, less<true>(*this));
size_t new_first = first; size_t new_first = first;
for (size_t j = first + 1; j < last; ++j) for (size_t j = first + 1; j < last; ++j)
{ {
if (memcmpSmallAllowOverflow15( if (cmp(res[j], res[new_first]) != 0)
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
{ {
if (j - new_first > 1) if (j - new_first > 1)
new_ranges.emplace_back(new_first, j); new_ranges.emplace_back(new_first, j);
@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
/// Since then we are working inside the interval. /// Since then we are working inside the interval.
if (reverse) std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<false>(*this));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less<true>(*this));
size_t new_first = first; size_t new_first = first;
for (size_t j = first + 1; j < limit; ++j) for (size_t j = first + 1; j < limit; ++j)
{ {
if (memcmpSmallAllowOverflow15( if (cmp(res[j], res[new_first]) != 0)
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0)
{ {
if (j - new_first > 1) if (j - new_first > 1)
new_ranges.emplace_back(new_first, j); new_ranges.emplace_back(new_first, j);
@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
size_t new_last = limit; size_t new_last = limit;
for (size_t j = limit; j < last; ++j) for (size_t j = limit; j < last; ++j)
{ {
if (memcmpSmallAllowOverflow15( if (cmp(res[j], res[new_first]) == 0)
chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1,
chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0)
{ {
std::swap(res[j], res[new_last]); std::swap(res[j], res[new_last]);
++new_last; ++new_last;
@ -408,6 +391,56 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc
} }
} }
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Cmp<false>(*this));
else
getPermutationImpl(limit, res, Cmp<true>(*this));
}
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_ranges, Cmp<false>(*this));
else
updatePermutationImpl(limit, res, equal_ranges, Cmp<true>(*this));
}
template <bool positive>
struct ColumnString::CmpWithCollation
{
const ColumnString & parent;
const Collator & collator;
CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{
int res = collator.compare(
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
return positive ? res : -res;
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, CmpWithCollation<false>(*this, collator));
else
getPermutationImpl(limit, res, CmpWithCollation<true>(*this, collator));
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
if (reverse)
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<false>(*this, collator));
else
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation<true>(*this, collator));
}
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
{ {
size_t col_size = size(); size_t col_size = size();
@ -476,13 +509,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const
size_t min_idx = 0; size_t min_idx = 0;
size_t max_idx = 0; size_t max_idx = 0;
less<true> less_op(*this); Cmp<true> cmp_op(*this);
for (size_t i = 1; i < col_size; ++i) for (size_t i = 1; i < col_size; ++i)
{ {
if (less_op(i, min_idx)) if (cmp_op(i, min_idx) < 0)
min_idx = i; min_idx = i;
else if (less_op(max_idx, i)) else if (cmp_op(max_idx, i) < 0)
max_idx = i; max_idx = i;
} }
@ -491,7 +524,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const
} }
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
{ {
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_); const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
@ -500,134 +533,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs
reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m)); reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m));
} }
template <bool positive>
struct ColumnString::lessWithCollation
{
const ColumnString & parent;
const Collator & collator;
lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
bool operator()(size_t lhs, size_t rhs) const
{
int res = collator.compare(
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
return positive ? (res < 0) : (res > 0);
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
{
size_t s = offsets.size();
res.resize(s);
for (size_t i = 0; i < s; ++i)
res[i] = i;
if (limit >= s)
limit = 0;
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<false>(*this, collator));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<true>(*this, collator));
}
else
{
if (reverse)
std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
else
std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
}
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
if (equal_ranges.empty())
return;
if (limit >= size() || limit >= equal_ranges.back().second)
limit = 0;
size_t number_of_ranges = equal_ranges.size();
if (limit)
--number_of_ranges;
EqualRanges new_ranges;
SCOPE_EXIT({equal_ranges = std::move(new_ranges);});
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto& [first, last] = equal_ranges[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<false>(*this, collator));
else
std::sort(res.begin() + first, res.begin() + last, lessWithCollation<true>(*this, collator));
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
new_first = j;
}
}
if (last - new_first > 1)
new_ranges.emplace_back(new_first, last);
}
if (limit)
{
const auto & [first, last] = equal_ranges.back();
if (limit < first || limit > last)
return;
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<false>(*this, collator));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation<true>(*this, collator));
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
new_first = j;
}
}
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (collator.compare(
reinterpret_cast<const char *>(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]),
reinterpret_cast<const char *>(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
}
}
if (new_last - new_first > 1)
new_ranges.emplace_back(new_first, new_last);
}
}
void ColumnString::protect() void ColumnString::protect()
{ {
getChars().protect(); getChars().protect();

View File

@ -43,14 +43,20 @@ private:
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
template <bool positive> template <bool positive>
struct less; struct Cmp;
template <bool positive> template <bool positive>
struct lessWithCollation; struct CmpWithCollation;
ColumnString() = default; ColumnString() = default;
ColumnString(const ColumnString & src); ColumnString(const ColumnString & src);
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const;
public: public:
const char * getFamilyName() const override { return "String"; } const char * getFamilyName() const override { return "String"; }
TypeIndex getDataType() const override { return TypeIndex::String; } TypeIndex getDataType() const override { return TypeIndex::String; }
@ -229,16 +235,16 @@ public:
int direction, int nan_direction_hint) const override; int direction, int nan_direction_hint) const override;
/// Variant of compareAt for string comparison with respect of collation. /// Variant of compareAt for string comparison with respect of collation.
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const; int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
/// Sorting with respect of collation. /// Sorting with respect of collation.
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override;
@ -270,6 +276,8 @@ public:
// Throws an exception if offsets/chars are messed up // Throws an exception if offsets/chars are messed up
void validate() const; void validate() const;
bool isCollationSupported() const override { return true; }
}; };

View File

@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se
return res; return res;
} }
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{ {
const size_t tuple_size = columns.size(); const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) for (size_t i = 0; i < tuple_size; ++i)
if (int res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint)) {
int res;
if (collator && columns[i]->isCollationSupported())
res = columns[i]->compareAtWithCollation(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint, *collator);
else
res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint);
if (res)
return res; return res;
}
return 0; return 0;
} }
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const int direction, int nan_direction_hint) const
@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint); compare_results, direction, nan_direction_hint);
} }
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
template <bool positive> template <bool positive>
struct ColumnTuple::Less struct ColumnTuple::Less
{ {
TupleColumns columns; TupleColumns columns;
int nan_direction_hint; int nan_direction_hint;
const Collator * collator;
Less(const TupleColumns & columns_, int nan_direction_hint_) Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr)
: columns(columns_), nan_direction_hint(nan_direction_hint_) : columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_)
{ {
} }
@ -308,7 +325,11 @@ struct ColumnTuple::Less
{ {
for (const auto & column : columns) for (const auto & column : columns)
{ {
int res = column->compareAt(a, b, *column, nan_direction_hint); int res;
if (collator && column->isCollationSupported())
res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
else
res = column->compareAt(a, b, *column, nan_direction_hint);
if (res < 0) if (res < 0)
return positive; return positive;
else if (res > 0) else if (res > 0)
@ -318,7 +339,8 @@ struct ColumnTuple::Less
} }
}; };
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const template <typename LessOperator>
void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const
{ {
size_t rows = size(); size_t rows = size();
res.resize(rows); res.resize(rows);
@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
if (limit) if (limit)
{ {
if (reverse) std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(columns, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(columns, nan_direction_hint));
} }
else else
{ {
if (reverse) std::sort(res.begin(), res.end(), less);
std::sort(res.begin(), res.end(), Less<false>(columns, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(columns, nan_direction_hint));
} }
} }
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{ {
if (equal_ranges.empty()) if (equal_ranges.empty())
return; return;
for (const auto & column : columns) for (const auto & column : columns)
{ {
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); if (collator && column->isCollationSupported())
column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges);
else
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first) while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
equal_ranges.pop_back(); equal_ranges.pop_back();
@ -361,6 +380,32 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio
} }
} }
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint));
else
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint));
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges);
}
void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (reverse)
getPermutationImpl(limit, res, Less<false>(columns, nan_direction_hint, &collator));
else
getPermutationImpl(limit, res, Less<true>(columns, nan_direction_hint, &collator));
}
void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator);
}
void ColumnTuple::gather(ColumnGathererStream & gatherer) void ColumnTuple::gather(ColumnGathererStream & gatherer)
{ {
gatherer.gather(*this); gatherer.gather(*this);
@ -433,5 +478,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
return false; return false;
} }
bool ColumnTuple::isCollationSupported() const
{
for (const auto& column : columns)
{
if (column->isCollationSupported())
return true;
}
return false;
}
} }

View File

@ -75,15 +75,19 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num, void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results, PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override; int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
void reserve(size_t n) override; void reserve(size_t n) override;
size_t byteSize() const override; size_t byteSize() const override;
size_t allocatedBytes() const override; size_t allocatedBytes() const override;
void protect() override; void protect() override;
void forEachSubcolumn(ColumnCallback callback) override; void forEachSubcolumn(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override; bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
size_t tupleSize() const { return columns.size(); } size_t tupleSize() const { return columns.size(); }
@ -94,6 +98,15 @@ public:
Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; } Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }
const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
private:
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename LessOperator>
void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const;
void updatePermutationImpl(
bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const;
}; };

View File

@ -9,7 +9,7 @@
class SipHash; class SipHash;
class Collator;
namespace DB namespace DB
{ {
@ -18,6 +18,7 @@ namespace ErrorCodes
{ {
extern const int CANNOT_GET_SIZE_OF_FIELD; extern const int CANNOT_GET_SIZE_OF_FIELD;
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
extern const int BAD_COLLATION;
} }
class Arena; class Arena;
@ -250,6 +251,12 @@ public:
*/ */
virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
/// Equivalent to compareAt, but collator is used to compare values.
virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
}
/// Compare the whole column with single value from rhs column. /// Compare the whole column with single value from rhs column.
/// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare. /// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare.
/// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction /// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction
@ -277,6 +284,18 @@ public:
*/ */
virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0; virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0;
/** Equivalent to getPermutation and updatePermutation but collator is used to compare values.
* Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them.
*/
virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
/** Copies each element according offsets parameter. /** Copies each element according offsets parameter.
* (i-th element should be copied offsets[i] - offsets[i - 1] times.) * (i-th element should be copied offsets[i] - offsets[i - 1] times.)
* It is necessary in ARRAY JOIN operation. * It is necessary in ARRAY JOIN operation.
@ -402,6 +421,8 @@ public:
virtual bool lowCardinality() const { return false; } virtual bool lowCardinality() const { return false; }
virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default; virtual ~IColumn() = default;
IColumn() = default; IColumn() = default;
IColumn(const IColumn &) = default; IColumn(const IColumn &) = default;

View File

@ -96,7 +96,7 @@ struct SortCursorImpl
: column_desc.column_number; : column_desc.column_number;
sort_columns.push_back(columns[column_number].get()); sort_columns.push_back(columns[column_number].get());
need_collation[j] = desc[j].collator != nullptr && typeid_cast<const ColumnString *>(sort_columns.back()); /// TODO Nullable(String) need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String)
has_collation |= need_collation[j]; has_collation |= need_collation[j];
} }
@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
int nulls_direction = desc.nulls_direction; int nulls_direction = desc.nulls_direction;
int res; int res;
if (impl->need_collation[i]) if (impl->need_collation[i])
{ res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator);
const ColumnString & column_string = assert_cast<const ColumnString &>(*impl->sort_columns[i]);
res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator);
}
else else
res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction); res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction);

View File

@ -2,6 +2,8 @@
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
@ -86,8 +88,7 @@ struct PartialSortingLessWithCollation
} }
else if (isCollationRequired(elem.description)) else if (isCollationRequired(elem.description))
{ {
const ColumnString & column_string = assert_cast<const ColumnString &>(*elem.column); res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator);
res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator);
} }
else else
res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction);
@ -101,7 +102,6 @@ struct PartialSortingLessWithCollation
} }
}; };
void sortBlock(Block & block, const SortDescription & description, UInt64 limit) void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
{ {
if (!block) if (!block)
@ -120,14 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
bool is_column_const = false; bool is_column_const = false;
if (isCollationRequired(description[0])) if (isCollationRequired(description[0]))
{ {
/// it it's real string column, than we need sort if (!column->isCollationSupported())
if (const ColumnString * column_string = checkAndGetColumn<ColumnString>(column)) throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm);
else if (checkAndGetColumnConstData<ColumnString>(column)) if (isColumnConst(*column))
is_column_const = true; is_column_const = true;
else else
throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
} }
else if (!isColumnConst(*column)) else if (!isColumnConst(*column))
{ {
@ -163,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
const IColumn * column = columns_with_sort_desc[i].column; const IColumn * column = columns_with_sort_desc[i].column;
if (isCollationRequired(description[i])) if (isCollationRequired(description[i]))
{ {
if (!checkAndGetColumn<ColumnString>(column) && !checkAndGetColumnConstData<ColumnString>(column)) if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
need_collation = true; need_collation = true;
} }
@ -187,10 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
if (isCollationRequired(column.description)) if (isCollationRequired(column.description))
{ {
const ColumnString & column_string = assert_cast<const ColumnString &>(*column.column); column.column->updatePermutationWithCollation(
column_string.updatePermutationWithCollation( *column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
*column.description.collator,
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
} }
else else
{ {

View File

@ -0,0 +1,64 @@
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
Order by tuple without collate
1 Ё
1 а
1 ё
2 А
2 Я
2 я
Order by tuple with collate
1 а
1 ё
1 Ё
2 А
2 я
2 Я
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
1 \N
2 \N
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
1 \N
2 \N
Order by tuple without collate
1 Ё
1 а
1 ё
1 \N
2 А
2 Я
2 я
2 \N
Order by tuple with collate
1 а
1 ё
1 Ё
1 \N
2 А
2 я
2 Я
2 \N

View File

@ -0,0 +1,33 @@
DROP TABLE IF EXISTS test_collate;
DROP TABLE IF EXISTS test_collate_null;
CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory();
CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory();
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я');
INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null);
SELECT 'Order by without collate';
SELECT * FROM test_collate ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
SELECT 'Order by without collate';
SELECT * FROM test_collate_null ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate_null ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru';
DROP TABLE test_collate;
DROP TABLE test_collate_null;

View File

@ -0,0 +1,36 @@
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
1 \N
2 \N
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
1 \N
2 \N
Order by tuple without collate
1 Ё
1 а
1 ё
1 \N
2 А
2 Я
2 я
2 \N
Order by tuple with collate
1 а
1 ё
1 Ё
1 \N
2 А
2 я
2 Я
2 \N

View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS test_collate;
CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory();
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null);
SELECT 'Order by without collate';
SELECT * FROM test_collate ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
DROP TABLE test_collate;

View File

@ -0,0 +1,50 @@
1 ['а']
2 ['А']
1 ['ё']
1 ['ё','а']
2 ['ё','а','а']
1 ['ё','я']
1 ['Ё']
2 ['я','а']
2 ['Я']
1 ['а']
1 ['ё']
1 ['ё','а']
1 ['ё','я']
1 ['Ё']
2 ['А']
2 ['ё','а','а']
2 ['я','а']
2 ['Я']
1 ['а']
2 ['А']
1 ['ё']
1 ['ё','а']
2 ['ё','а','а',NULL]
1 ['ё',NULL,'я']
1 ['Ё']
2 ['я']
2 [NULL,'Я']
1 ['а']
1 ['ё']
1 ['ё','а']
1 ['ё',NULL,'я']
1 ['Ё']
2 ['А']
2 ['ё','а','а',NULL]
2 ['я']
2 [NULL,'Я']
2 [['а','а'],['я','ё']]
1 [['а','Ё'],['ё','я']]
1 [['а','я'],['а','ё']]
2 [['ё']]
1 [['а','Ё'],['ё','я']]
1 [['а','я'],['а','ё']]
2 [['а','а'],['я','ё']]
2 [['ё']]

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS collate_test1;
DROP TABLE IF EXISTS collate_test2;
DROP TABLE IF EXISTS collate_test3;
CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory();
CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory();
CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory();
INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']);
INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]);
INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]);
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
SELECT '';
DROP TABLE collate_test1;
DROP TABLE collate_test2;
DROP TABLE collate_test3;

View File

@ -0,0 +1,60 @@
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
2 (1,'я')
1 (2,'а')
2 (2,'А')
2 (2,'Я')
1 (3,'я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
1 (2,'а')
1 (3,'я')
2 (1,'я')
2 (2,'А')
2 (2,'Я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
2 (1,'я')
1 (1,NULL)
2 (2,'А')
2 (2,'Я')
1 (2,NULL)
2 (2,NULL)
1 (3,'я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
1 (1,NULL)
1 (2,NULL)
1 (3,'я')
2 (1,'я')
2 (2,'А')
2 (2,'Я')
2 (2,NULL)
2 (1,(1,['А']))
2 (1,(1,['ё','а','а']))
1 (1,(1,['Ё']))
2 (1,(1,['Я']))
1 (1,(2,['а']))
1 (1,(2,['ё','я']))
1 (2,(1,['ё']))
1 (2,(1,['ё','а']))
2 (2,(1,['я']))
1 (1,(1,['Ё']))
1 (1,(2,['а']))
1 (1,(2,['ё','я']))
1 (2,(1,['ё']))
1 (2,(1,['ё','а']))
2 (1,(1,['А']))
2 (1,(1,['ё','а','а']))
2 (1,(1,['Я']))
2 (2,(1,['я']))

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS collate_test1;
DROP TABLE IF EXISTS collate_test2;
DROP TABLE IF EXISTS collate_test3;
CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory();
CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory();
CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory();
INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я'));
INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null));
INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а'])));
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
SELECT '';
DROP TABLE collate_test1;
DROP TABLE collate_test2;
DROP TABLE collate_test3;

View File

@ -155,6 +155,10 @@
01509_dictionary_preallocate 01509_dictionary_preallocate
01526_max_untracked_memory 01526_max_untracked_memory
01530_drop_database_atomic_sync 01530_drop_database_atomic_sync
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
01546_log_queries_min_query_duration_ms 01546_log_queries_min_query_duration_ms
01547_query_log_current_database 01547_query_log_current_database
01548_query_log_query_execution_ms 01548_query_log_query_execution_ms