From 2892252b3695080e73e9326ff505e170c21b2386 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 22 Oct 2020 23:23:44 +0300 Subject: [PATCH 01/11] Support collate in LowCardinality(String) and Nullable(String) and refactor ColumnString --- src/Columns/ColumnLowCardinality.cpp | 78 ++++-- src/Columns/ColumnLowCardinality.h | 9 + src/Columns/ColumnNullable.cpp | 51 +++- src/Columns/ColumnNullable.h | 9 + src/Columns/ColumnString.cpp | 230 +++++------------- src/Columns/ColumnString.h | 16 +- src/Interpreters/sortBlock.cpp | 57 ++++- ...01532_collate_in_low_cardinality.reference | 28 +++ .../01532_collate_in_low_cardinality.sql | 18 ++ .../01533_collate_in_nullable.reference | 36 +++ .../0_stateless/01533_collate_in_nullable.sql | 18 ++ 11 files changed, 342 insertions(+), 208 deletions(-) create mode 100644 tests/queries/0_stateless/01532_collate_in_low_cardinality.reference create mode 100644 tests/queries/0_stateless/01532_collate_in_low_cardinality.sql create mode 100644 tests/queries/0_stateless/01533_collate_in_nullable.reference create mode 100644 tests/queries/0_stateless/01533_collate_in_nullable.sql diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 64b503ed325..2e941a3ef8a 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -14,6 +15,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; + extern const int BAD_COLLATION; } namespace @@ -295,14 +297,24 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num compare_results, direction, nan_direction_hint); } -void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const { if (limit == 0) limit = size(); size_t unique_limit = getDictionary().size(); Permutation unique_perm; - getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); + if (collator) + { + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + column_string->getPermutationWithCollation(*collator, reverse, unique_limit, unique_perm); + } + else + getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); /// TODO: optimize with sse. @@ -330,7 +342,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di } } -void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +template +void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const { if (equal_ranges.empty()) return; @@ -345,20 +358,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan EqualRanges new_ranges; SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + for (size_t i = 0; i < number_of_ranges; ++i) { const auto& [first, last] = equal_ranges[i]; - if (reverse) - std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); - else - std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + std::sort(res.begin() + first, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (comparator(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -379,17 +389,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { - if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0) + if (comparator(res[new_first],res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -401,7 +406,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan auto new_last = limit; for (auto j = limit; j < last; ++j) { - if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0) + if (comparator(res[new_first], res[j]) == 0) { std::swap(res[new_last], res[j]); ++new_last; @@ -412,6 +417,43 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan } } +void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, nan_direction_hint, res); +} + +void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs) + { + int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint); + return reverse ? -ret : ret; + }; + + updatePermutationImpl(limit, res, equal_ranges, comparator); +} + +void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator); +} + +void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +{ + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + auto comparator = [this, &column_string, &collator, reverse](size_t lhs, size_t rhs) + { + int ret = column_string->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *column_string, collator); + return reverse ? -ret : ret; + }; + + updatePermutationImpl(limit, res, equal_ranges, comparator); +} + std::vector ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const { auto columns = getIndexes().scatter(num_columns, selector); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 0aeda4567fd..e45449873fc 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -31,6 +31,11 @@ class ColumnLowCardinality final : public COWHelper + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; + public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -129,6 +134,10 @@ public: void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const; + + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + ColumnPtr replicate(const Offsets & offsets) const override { return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 51248a598af..63b86f38342 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -17,6 +18,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; + extern const int BAD_COLLATION; } @@ -256,10 +258,21 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const { /// Cannot pass limit because of unknown amount of NULLs. - getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); + + if (collator) + { + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(&getNestedColumn()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + column_string->getPermutationWithCollation(*collator, reverse, 0, res); + } + else + getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); if ((null_direction_hint > 0) != reverse) { @@ -329,7 +342,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi } } -void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const { if (equal_ranges.empty()) return; @@ -432,12 +445,42 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire } } - getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); + if (collator) + { + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(getNestedColumn()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + column_string->updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); + } + else + getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); equal_ranges = std::move(new_ranges); std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges)); } +void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, null_direction_hint, res); +} + +void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges); +} + +void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, null_direction_hint, res, &collator); +} + +void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator); +} + void ColumnNullable::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index e4033e22737..3d7a7970bd3 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -6,6 +6,7 @@ #include #include +class Collator; namespace DB { @@ -30,6 +31,11 @@ private: ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_); ColumnNullable(const ColumnNullable &) = default; + void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; + public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -94,6 +100,9 @@ public: int direction, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const; + void updatePermutationWithCollation( + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index cd06ea20f83..27dd9e54685 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -284,22 +284,23 @@ void ColumnString::compareColumn( compare_results, direction, nan_direction_hint); } -template -struct ColumnString::less +struct ColumnString::cmp { const ColumnString & parent; - explicit less(const ColumnString & parent_) : parent(parent_) {} - bool operator()(size_t lhs, size_t rhs) const + bool reverse; + cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); - return positive ? (res < 0) : (res > 0); + return reverse ? -res : res; } }; -void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const +template +void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const { size_t s = offsets.size(); res.resize(s); @@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio if (limit >= s) limit = 0; + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); - } + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { - if (reverse) - std::sort(res.begin(), res.end(), less(*this)); - else - std::sort(res.begin(), res.end(), less(*this)); - } + std::sort(res.begin(), res.end(), less); } -void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const +template +void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const { if (equal_ranges.empty()) return; @@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc if (limit) --number_of_ranges; + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + for (size_t i = 0; i < number_of_ranges; ++i) { const auto & [first, last] = equal_ranges[i]; - - if (reverse) - std::sort(res.begin() + first, res.begin() + last, less(*this)); - else - std::sort(res.begin() + first, res.begin() + last, less(*this)); + std::sort(res.begin() + first, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < last; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + if (comparator(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + if (comparator(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc size_t new_last = limit; for (size_t j = limit; j < last; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0) + if (comparator(res[j], res[new_first]) == 0) { std::swap(res[j], res[new_last]); ++new_last; @@ -408,6 +391,45 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc } } +void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const +{ + getPermutationImpl(limit, res, cmp(*this, reverse)); +} + +void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(limit, res, equal_ranges, cmp(*this, reverse)); +} + +struct ColumnString::cmpWithCollation +{ + const ColumnString & parent; + const Collator & collator; + bool reverse; + + cmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} + + int operator()(size_t lhs, size_t rhs) const + { + int res = collator.compare( + reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), + reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); + + return reverse ? -res : res; + } +}; + +void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const +{ + getPermutationImpl(limit, res, cmpWithCollation(*this, collator, reverse)); +} + +void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(limit, res, equal_ranges, cmpWithCollation(*this, collator, reverse)); +} + + ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const { size_t col_size = size(); @@ -476,13 +498,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - less less_op(*this); + cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { - if (less_op(i, min_idx)) + if (cmp_op(i, min_idx) < 0) min_idx = i; - else if (less_op(max_idx, i)) + else if (cmp_op(max_idx, i) < 0) max_idx = i; } @@ -500,134 +522,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs reinterpret_cast(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m)); } - -template -struct ColumnString::lessWithCollation -{ - const ColumnString & parent; - const Collator & collator; - - lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {} - - bool operator()(size_t lhs, size_t rhs) const - { - int res = collator.compare( - reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), - reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); - - return positive ? (res < 0) : (res > 0); - } -}; - -void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const -{ - size_t s = offsets.size(); - res.resize(s); - for (size_t i = 0; i < s; ++i) - res[i] = i; - - if (limit >= s) - limit = 0; - - if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); - } - else - { - if (reverse) - std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); - else - std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); - } -} - -void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const -{ - if (equal_ranges.empty()) - return; - - if (limit >= size() || limit >= equal_ranges.back().second) - limit = 0; - - size_t number_of_ranges = equal_ranges.size(); - if (limit) - --number_of_ranges; - - EqualRanges new_ranges; - SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); - - for (size_t i = 0; i < number_of_ranges; ++i) - { - const auto& [first, last] = equal_ranges[i]; - - if (reverse) - std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); - else - std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); - auto new_first = first; - for (auto j = first + 1; j < last; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) - { - if (j - new_first > 1) - new_ranges.emplace_back(new_first, j); - - new_first = j; - } - } - if (last - new_first > 1) - new_ranges.emplace_back(new_first, last); - } - - if (limit) - { - const auto & [first, last] = equal_ranges.back(); - - if (limit < first || limit > last) - return; - - /// Since then we are working inside the interval. - - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); - - auto new_first = first; - for (auto j = first + 1; j < limit; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) - { - if (j - new_first > 1) - new_ranges.emplace_back(new_first, j); - - new_first = j; - } - } - auto new_last = limit; - for (auto j = limit; j < last; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0) - { - std::swap(res[new_last], res[j]); - ++new_last; - } - } - if (new_last - new_first > 1) - new_ranges.emplace_back(new_first, new_last); - } -} - void ColumnString::protect() { getChars().protect(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 19398e07b83..c91d982f126 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -42,15 +42,19 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } - template - struct less; + struct cmp; - template - struct lessWithCollation; + struct cmpWithCollation; ColumnString() = default; ColumnString(const ColumnString & src); + template + void getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; + public: const char * getFamilyName() const override { return "String"; } TypeIndex getDataType() const override { return TypeIndex::String; } @@ -233,12 +237,12 @@ public: void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; /// Sorting with respect of collation. void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index c2436806fcd..5d114c746e5 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include @@ -20,6 +22,24 @@ static bool isCollationRequired(const SortColumnDescription & description) return description.collator != nullptr; } +static bool isCollationSupported(const IColumn * column) +{ + if (column->getDataType() == TypeIndex::String) + return true; + + if (column->getDataType() == TypeIndex::Nullable) + { + const ColumnNullable * column_nullable = assert_cast(column); + return isCollationSupported(&column_nullable->getNestedColumn()); + } + + if (column->getDataType() == TypeIndex::LowCardinality) + { + const ColumnLowCardinality * column_low_cardinality = assert_cast(column); + return isCollationSupported(column_low_cardinality->getDictionary().getNestedColumn().get()); + } + return false; +} ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) { @@ -101,7 +121,6 @@ struct PartialSortingLessWithCollation } }; - void sortBlock(Block & block, const SortDescription & description, UInt64 limit) { if (!block) @@ -120,14 +139,18 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) bool is_column_const = false; if (isCollationRequired(description[0])) { - /// it it's real string column, than we need sort + /// Check if column supports collations + if (!isCollationSupported(column)) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + if (const ColumnString * column_string = checkAndGetColumn(column)) column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm); - else if (checkAndGetColumnConstData(column)) + else if (const ColumnNullable * column_nullable = checkAndGetColumn(column)) + column_nullable->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); + else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column)) + column_low_cardinality->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); + else if (isColumnConst(*column)) is_column_const = true; - else - throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); - } else if (!isColumnConst(*column)) { @@ -163,8 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) const IColumn * column = columns_with_sort_desc[i].column; if (isCollationRequired(description[i])) { - if (!checkAndGetColumn(column) && !checkAndGetColumnConstData(column)) - throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); + if (!isCollationSupported(column)) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); need_collation = true; } @@ -187,10 +210,20 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (isCollationRequired(column.description)) { - const ColumnString & column_string = assert_cast(*column.column); - column_string.updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + if (const ColumnString * column_string = checkAndGetColumn(column.column)) + column_string->updatePermutationWithCollation( + *column.description.collator, + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + + else if (const ColumnNullable * column_nullable = checkAndGetColumn(column.column)) + column_nullable->updatePermutationWithCollation( + *column.description.collator, + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + + else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column.column)) + column_low_cardinality->updatePermutationWithCollation( + *column.description.collator, + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); } else { diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference new file mode 100644 index 00000000000..b7a4830f9cf --- /dev/null +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference @@ -0,0 +1,28 @@ +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +Order by tuple without collate +1 Ё +1 а +1 ё +2 А +2 Я +2 я +Order by tuple with collate +1 а +1 ё +1 Ё +2 А +2 я +2 Я diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql new file mode 100644 index 00000000000..0f4194ee671 --- /dev/null +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test_collate; + +CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory(); + +INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'); + +SELECT 'Order by without collate'; +SELECT * FROM test_collate ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; + +DROP TABLE test_collate; + diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.reference b/tests/queries/0_stateless/01533_collate_in_nullable.reference new file mode 100644 index 00000000000..6bb06cbc8b5 --- /dev/null +++ b/tests/queries/0_stateless/01533_collate_in_nullable.reference @@ -0,0 +1,36 @@ +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +1 \N +2 \N +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +1 \N +2 \N +Order by tuple without collate +1 Ё +1 а +1 ё +1 \N +2 А +2 Я +2 я +2 \N +Order by tuple with collate +1 а +1 ё +1 Ё +1 \N +2 А +2 я +2 Я +2 \N diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.sql b/tests/queries/0_stateless/01533_collate_in_nullable.sql new file mode 100644 index 00000000000..40b48bee465 --- /dev/null +++ b/tests/queries/0_stateless/01533_collate_in_nullable.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test_collate; + +CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory(); + +INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null); + +SELECT 'Order by without collate'; +SELECT * FROM test_collate ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; + +DROP TABLE test_collate; + From 18d8d5d81f5844f77ba3bac972dc7f893fc014d7 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Fri, 23 Oct 2020 11:02:40 +0300 Subject: [PATCH 02/11] Add tests in arcadia_skip_list.txt --- tests/queries/0_stateless/arcadia_skip_list.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index f5b81c08520..cc03dee9eb0 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -155,5 +155,9 @@ 01509_dictionary_preallocate 01526_max_untracked_memory 01530_drop_database_atomic_sync +01532_collate_in_low_cardinality +01533_collate_in_nullable +01542_collate_in_array +01543_collate_in_tuple 01547_query_log_current_database 01548_query_log_query_execution_ms From 97a6e3dde2bb0b99a1323e05370ae07d6fc3012c Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Sat, 24 Oct 2020 20:15:03 +0300 Subject: [PATCH 03/11] Skip collate test in fasttest --- docker/test/fasttest/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 8300c31681e..f11e51dee98 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -240,6 +240,8 @@ TESTS_TO_SKIP=( 01354_order_by_tuple_collate_const 01355_ilike 01411_bayesian_ab_testing + 01532_collate_in_low_cardinality + 01533_collate_in_nullable _orc_ arrow avro From 5c296365e2c808e4ab601c389f38c7485f072e0b Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 27 Oct 2020 14:12:48 +0300 Subject: [PATCH 04/11] Fix build error --- src/Columns/ColumnString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 27dd9e54685..3093ae10646 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -288,7 +288,7 @@ struct ColumnString::cmp { const ColumnString & parent; bool reverse; - cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + explicit cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( From 4d399fff3ee8a0922ca3b33e80daf6fae5730d69 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 29 Oct 2020 14:24:01 +0300 Subject: [PATCH 05/11] Support collation for Array and Tuple --- docker/test/fasttest/run.sh | 2 + src/Columns/ColumnArray.cpp | 120 +++++++++++------- src/Columns/ColumnArray.h | 15 +++ src/Columns/ColumnLowCardinality.cpp | 33 ++--- src/Columns/ColumnLowCardinality.h | 19 ++- src/Columns/ColumnNullable.cpp | 30 +++-- src/Columns/ColumnNullable.h | 20 +-- src/Columns/ColumnString.cpp | 41 +++--- src/Columns/ColumnString.h | 20 +-- src/Columns/ColumnTuple.cpp | 89 ++++++++++--- src/Columns/ColumnTuple.h | 16 ++- src/Columns/IColumn.h | 23 +++- src/Core/SortCursor.h | 7 +- src/Interpreters/sortBlock.cpp | 56 ++------ ...01532_collate_in_low_cardinality.reference | 36 ++++++ .../01532_collate_in_low_cardinality.sql | 17 ++- .../01542_collate_in_array.reference | 50 ++++++++ .../0_stateless/01542_collate_in_array.sql | 34 +++++ .../01543_collate_in_tuple.reference | 60 +++++++++ .../0_stateless/01543_collate_in_tuple.sql | 34 +++++ 20 files changed, 524 insertions(+), 198 deletions(-) create mode 100644 tests/queries/0_stateless/01542_collate_in_array.reference create mode 100644 tests/queries/0_stateless/01542_collate_in_array.sql create mode 100644 tests/queries/0_stateless/01543_collate_in_tuple.reference create mode 100644 tests/queries/0_stateless/01543_collate_in_tuple.sql diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index f11e51dee98..ad25be9e2de 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -242,6 +242,8 @@ TESTS_TO_SKIP=( 01411_bayesian_ab_testing 01532_collate_in_low_cardinality 01533_collate_in_nullable + 01542_collate_in_array + 01543_collate_in_tuple _orc_ arrow avro diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index e4d17c586ac..c061dd50642 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n) offsets_data.resize_assume_reserved(offsets_data.size() - n); } - -int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const { const ColumnArray & rhs = assert_cast(rhs_); @@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir size_t rhs_size = rhs.sizeAt(m); size_t min_size = std::min(lhs_size, rhs_size); for (size_t i = 0; i < min_size; ++i) - if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint)) + { + int res; + if (collator) + res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator); + else + res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint); + if (res) return res; + } return lhs_size < rhs_size ? -1 @@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir : 1); } +int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs_, nan_direction_hint); +} + +int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator); +} + void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -352,27 +368,25 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -namespace -{ - template - struct Less +struct ColumnArray::Cmp { + const ColumnArray & parent; + int nan_direction_hint; + bool reverse; + const Collator * collator; + + Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) + : parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {} + + int operator()(size_t lhs, size_t rhs) const { - const ColumnArray & parent; - int nan_direction_hint; - - Less(const ColumnArray & parent_, int nan_direction_hint_) - : parent(parent_), nan_direction_hint(nan_direction_hint_) {} - - bool operator()(size_t lhs, size_t rhs) const - { - if (positive) - return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0; - else - return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0; - } - }; -} - + int res; + if (collator) + res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator); + else + res = parent.compareAt(lhs, rhs, parent, nan_direction_hint); + return reverse ? -res : res; + } +}; void ColumnArray::reserve(size_t n) { @@ -753,7 +767,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray & indexes, size_t limit INSTANTIATE_INDEX_IMPL(ColumnArray) -void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +template +void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const { size_t s = size(); if (limit >= s) @@ -763,23 +778,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h for (size_t i = 0; i < s; ++i) res[i] = i; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(*this, nan_direction_hint)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(*this, nan_direction_hint)); - } + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { - if (reverse) - std::sort(res.begin(), res.end(), Less(*this, nan_direction_hint)); - else - std::sort(res.begin(), res.end(), Less(*this, nan_direction_hint)); - } + std::sort(res.begin(), res.end(), less); } -void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +template +void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const { if (equal_range.empty()) return; @@ -792,20 +800,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio if (limit) --number_of_ranges; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + EqualRanges new_ranges; for (size_t i = 0; i < number_of_ranges; ++i) { const auto & [first, last] = equal_range[i]; - if (reverse) - std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); - else - std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); + std::sort(res.begin() + first, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (cmp(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -827,14 +834,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (cmp(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -845,7 +849,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio auto new_last = limit; for (auto j = limit; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0) + if (cmp(res[new_first], res[j]) == 0) { std::swap(res[new_last], res[j]); ++new_last; @@ -859,6 +863,26 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio equal_range = std::move(new_ranges); } +void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse)); +} + +void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse)); +} + +void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse, &collator)); +} + +void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse, &collator)); +} + ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const { if (replicate_offsets.empty()) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index cec8387ab66..028eaba73c5 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -77,8 +77,11 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; @@ -132,6 +135,8 @@ public: return false; } + bool isCollationSupported() const override { return getData().isCollationSupported(); } + private: WrappedPtr data; WrappedPtr offsets; @@ -169,6 +174,16 @@ private: ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const; + + template + void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const; + + struct Cmp; }; diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 2e941a3ef8a..37e97da88b9 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -15,7 +15,6 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; - extern const int BAD_COLLATION; } namespace @@ -280,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size)); } -int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const { const auto & low_cardinality_column = assert_cast(rhs); size_t n_index = getIndexes().getUInt(n); size_t m_index = low_cardinality_column.getIndexes().getUInt(m); + if (collator) + return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator); return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint); +} + +int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); +} + void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -306,12 +317,7 @@ void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int na Permutation unique_perm; if (collator) { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - column_string->getPermutationWithCollation(*collator, reverse, unique_limit, unique_perm); + getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm); } else getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); @@ -438,16 +444,11 @@ void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator); } -void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - auto comparator = [this, &column_string, &collator, reverse](size_t lhs, size_t rhs) + auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs) { - int ret = column_string->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *column_string, collator); + int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator); return reverse ? -ret : ret; }; diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e45449873fc..0874f619b8a 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -31,11 +31,6 @@ class ColumnLowCardinality final : public COWHelper - void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; - public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -130,13 +125,15 @@ public: PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; ColumnPtr replicate(const Offsets & offsets) const override { @@ -179,6 +176,7 @@ public: size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } bool isNumeric() const override { return getDictionary().isNumeric(); } bool lowCardinality() const override { return true; } + bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); } /** * Checks if the dictionary column is Nullable(T). @@ -318,6 +316,13 @@ private: void compactInplace(); void compactIfSharedDictionary(); + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; + + void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; }; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 63b86f38342..cbb82264694 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -18,7 +18,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; - extern const int BAD_COLLATION; } @@ -225,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const return ColumnNullable::create(indexed_data, indexed_null_map); } -int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const { /// NULL values share the properties of NaN values. /// Here the last parameter of compareAt is called null_direction_hint @@ -247,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null } const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); + if (collator) + return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator); + return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +{ + return compareAtImpl(n, m, rhs_, null_direction_hint); +} + +int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs_, null_direction_hint, &collator); +} + void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -264,12 +276,7 @@ void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_dir if (collator) { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(&getNestedColumn()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - column_string->getPermutationWithCollation(*collator, reverse, 0, res); + getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res); } else getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); @@ -447,12 +454,7 @@ void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_ if (collator) { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(getNestedColumn()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - column_string->updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); + getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); } else getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 3d7a7970bd3..47b0103eab4 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -31,11 +31,6 @@ private: ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_); ColumnNullable(const ColumnNullable &) = default; - void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; - - void updatePermutationImpl( - bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; - public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -98,11 +93,12 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const; + void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void updatePermutationWithCollation( - const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const; + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; @@ -138,6 +134,7 @@ public: bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); } bool onlyNull() const override { return nested_column->isDummy(); } + bool isCollationSupported() const override { return nested_column->isCollationSupported(); } /// Return the column that represents values. @@ -173,6 +170,13 @@ private: template void applyNullMapImpl(const ColumnUInt8 & map); + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const; + + void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; }; ColumnPtr makeNullable(const ColumnPtr & column); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 3093ae10646..9ea12041d85 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -284,11 +284,11 @@ void ColumnString::compareColumn( compare_results, direction, nan_direction_hint); } -struct ColumnString::cmp +struct ColumnString::Cmp { const ColumnString & parent; bool reverse; - explicit cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + explicit Cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( @@ -299,8 +299,8 @@ struct ColumnString::cmp } }; -template -void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const +template +void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const { size_t s = offsets.size(); res.resize(s); @@ -310,7 +310,7 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp compa if (limit >= s) limit = 0; - auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; if (limit) std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); @@ -318,8 +318,8 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp compa std::sort(res.begin(), res.end(), less); } -template -void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const +template +void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const { if (equal_ranges.empty()) return; @@ -334,7 +334,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR if (limit) --number_of_ranges; - auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; for (size_t i = 0; i < number_of_ranges; ++i) { @@ -344,7 +344,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR size_t new_first = first; for (size_t j = first + 1; j < last; ++j) { - if (comparator(res[j], res[new_first]) != 0) + if (cmp(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -370,7 +370,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) { - if (comparator(res[j], res[new_first]) != 0) + if (cmp(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -380,7 +380,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR size_t new_last = limit; for (size_t j = limit; j < last; ++j) { - if (comparator(res[j], res[new_first]) == 0) + if (cmp(res[j], res[new_first]) == 0) { std::swap(res[j], res[new_last]); ++new_last; @@ -393,21 +393,21 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const { - getPermutationImpl(limit, res, cmp(*this, reverse)); + getPermutationImpl(limit, res, Cmp(*this, reverse)); } void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, cmp(*this, reverse)); + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this, reverse)); } -struct ColumnString::cmpWithCollation +struct ColumnString::CmpWithCollation { const ColumnString & parent; const Collator & collator; bool reverse; - cmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} + CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { @@ -419,17 +419,16 @@ struct ColumnString::cmpWithCollation } }; -void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const +void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const { - getPermutationImpl(limit, res, cmpWithCollation(*this, collator, reverse)); + getPermutationImpl(limit, res, CmpWithCollation(*this, collator, reverse)); } void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, cmpWithCollation(*this, collator, reverse)); + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator, reverse)); } - ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const { size_t col_size = size(); @@ -498,7 +497,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - cmp cmp_op(*this); + Cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { @@ -513,7 +512,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const } -int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const +int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const { const ColumnString & rhs = assert_cast(rhs_); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c91d982f126..1e6f60e63b3 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -42,18 +42,18 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } - struct cmp; + struct Cmp; - struct cmpWithCollation; + struct CmpWithCollation; ColumnString() = default; ColumnString(const ColumnString & src); - template - void getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const; + template + void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const; - template - void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const; public: const char * getFamilyName() const override { return "String"; } @@ -233,16 +233,16 @@ public: int direction, int nan_direction_hint) const override; /// Variant of compareAt for string comparison with respect of collation. - int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; /// Sorting with respect of collation. - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; @@ -274,6 +274,8 @@ public: // Throws an exception if offsets/chars are messed up void validate() const; + + bool isCollationSupported() const override { return true; } }; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 98a6611edb7..f588762fb67 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se return res; } -int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) - if (int res = columns[i]->compareAt(n, m, *assert_cast(rhs).columns[i], nan_direction_hint)) + { + int res; + if (collator && columns[i]->isCollationSupported()) + res = columns[i]->compareAtWithCollation(n, m, *assert_cast(rhs).columns[i], nan_direction_hint, *collator); + else + res = columns[i]->compareAt(n, m, *assert_cast(rhs).columns[i], nan_direction_hint); + if (res) return res; - + } return 0; } +int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint); +} + void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -template +int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); +} + struct ColumnTuple::Less { TupleColumns columns; int nan_direction_hint; + bool reverse; + const Collator * collator; - Less(const TupleColumns & columns_, int nan_direction_hint_) - : columns(columns_), nan_direction_hint(nan_direction_hint_) + Less(const TupleColumns & columns_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) + : columns(columns_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) { } @@ -308,17 +325,22 @@ struct ColumnTuple::Less { for (const auto & column : columns) { - int res = column->compareAt(a, b, *column, nan_direction_hint); + int res; + if (collator && column->isCollationSupported()) + res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator); + else + res = column->compareAt(a, b, *column, nan_direction_hint); if (res < 0) - return positive; + return !reverse; else if (res > 0) - return !positive; + return reverse; } return false; } }; -void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +template +void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const { size_t rows = size(); res.resize(rows); @@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h if (limit) { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(columns, nan_direction_hint)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(columns, nan_direction_hint)); + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); } else { - if (reverse) - std::sort(res.begin(), res.end(), Less(columns, nan_direction_hint)); - else - std::sort(res.begin(), res.end(), Less(columns, nan_direction_hint)); + std::sort(res.begin(), res.end(), less); } } -void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const { if (equal_ranges.empty()) return; for (const auto & column : columns) { - column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); + if (collator && column->isCollationSupported()) + column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges); + else + column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first) equal_ranges.pop_back(); @@ -361,6 +380,26 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio } } +void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse)); +} + +void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges); +} + +void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse, &collator)); +} + +void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator); +} + void ColumnTuple::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); @@ -433,5 +472,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const return false; } +bool ColumnTuple::isCollationSupported() const +{ + for (const auto& column : columns) + { + if (column->isCollationSupported()) + return true; + } + return false; +} + } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index e8dfd4c8e44..c34768d85a4 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -20,7 +20,6 @@ private: using TupleColumns = std::vector; TupleColumns columns; - template struct Less; explicit ColumnTuple(MutableColumns && columns); @@ -75,15 +74,19 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; void protect() override; void forEachSubcolumn(ColumnCallback callback) override; bool structureEquals(const IColumn & rhs) const override; + bool isCollationSupported() const override; size_t tupleSize() const { return columns.size(); } @@ -94,6 +97,15 @@ public: Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; } const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } + +private: + int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; + + template + void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const; }; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 14e6a9d7eed..6dbcfacefe9 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -9,7 +9,7 @@ class SipHash; - +class Collator; namespace DB { @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int CANNOT_GET_SIZE_OF_FIELD; extern const int NOT_IMPLEMENTED; + extern const int BAD_COLLATION; } class Arena; @@ -250,6 +251,12 @@ public: */ virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + /// Equivalent to compareAt, but collator is used to compare values. + virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION); + } + /// Compare the whole column with single value from rhs column. /// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare. /// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction @@ -277,6 +284,18 @@ public: */ virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0; + /** Equivalent to getPermutation and updatePermutation but collator is used to compare values. + * Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them. + */ + virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + } + virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + } + /** Copies each element according offsets parameter. * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. @@ -402,6 +421,8 @@ public: virtual bool lowCardinality() const { return false; } + virtual bool isCollationSupported() const { return false; } + virtual ~IColumn() = default; IColumn() = default; IColumn(const IColumn &) = default; diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index 4c90cc723bf..7a222f70199 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -96,7 +96,7 @@ struct SortCursorImpl : column_desc.column_number; sort_columns.push_back(columns[column_number].get()); - need_collation[j] = desc[j].collator != nullptr && typeid_cast(sort_columns.back()); /// TODO Nullable(String) + need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String) has_collation |= need_collation[j]; } @@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper int nulls_direction = desc.nulls_direction; int res; if (impl->need_collation[i]) - { - const ColumnString & column_string = assert_cast(*impl->sort_columns[i]); - res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator); - } + res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); else res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction); diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 5d114c746e5..edf911fa61c 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -22,24 +22,6 @@ static bool isCollationRequired(const SortColumnDescription & description) return description.collator != nullptr; } -static bool isCollationSupported(const IColumn * column) -{ - if (column->getDataType() == TypeIndex::String) - return true; - - if (column->getDataType() == TypeIndex::Nullable) - { - const ColumnNullable * column_nullable = assert_cast(column); - return isCollationSupported(&column_nullable->getNestedColumn()); - } - - if (column->getDataType() == TypeIndex::LowCardinality) - { - const ColumnLowCardinality * column_low_cardinality = assert_cast(column); - return isCollationSupported(column_low_cardinality->getDictionary().getNestedColumn().get()); - } - return false; -} ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) { @@ -106,8 +88,7 @@ struct PartialSortingLessWithCollation } else if (isCollationRequired(elem.description)) { - const ColumnString & column_string = assert_cast(*elem.column); - res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator); + res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator); } else res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); @@ -139,18 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) bool is_column_const = false; if (isCollationRequired(description[0])) { - /// Check if column supports collations - if (!isCollationSupported(column)) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + if (!column->isCollationSupported()) + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); - if (const ColumnString * column_string = checkAndGetColumn(column)) - column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm); - else if (const ColumnNullable * column_nullable = checkAndGetColumn(column)) - column_nullable->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); - else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column)) - column_low_cardinality->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); - else if (isColumnConst(*column)) + if (isColumnConst(*column)) is_column_const = true; + else + column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); } else if (!isColumnConst(*column)) { @@ -186,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) const IColumn * column = columns_with_sort_desc[i].column; if (isCollationRequired(description[i])) { - if (!isCollationSupported(column)) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + if (!column->isCollationSupported()) + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); need_collation = true; } @@ -210,20 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (isCollationRequired(column.description)) { - if (const ColumnString * column_string = checkAndGetColumn(column.column)) - column_string->updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); - - else if (const ColumnNullable * column_nullable = checkAndGetColumn(column.column)) - column_nullable->updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); - - else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column.column)) - column_low_cardinality->updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + column.column->updatePermutationWithCollation( + *column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); } else { diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference index b7a4830f9cf..fbffea8df5a 100644 --- a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference @@ -26,3 +26,39 @@ Order by tuple with collate 2 А 2 я 2 Я +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +1 \N +2 \N +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +1 \N +2 \N +Order by tuple without collate +1 Ё +1 а +1 ё +1 \N +2 А +2 Я +2 я +2 \N +Order by tuple with collate +1 а +1 ё +1 Ё +1 \N +2 А +2 я +2 Я +2 \N diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql index 0f4194ee671..b6fba26eb2d 100644 --- a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql @@ -1,8 +1,12 @@ DROP TABLE IF EXISTS test_collate; +DROP TABLE IF EXISTS test_collate_null; CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory(); +CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory(); INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'); +INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null); + SELECT 'Order by without collate'; SELECT * FROM test_collate ORDER BY s; @@ -14,5 +18,16 @@ SELECT * FROM test_collate ORDER BY x, s; SELECT 'Order by tuple with collate'; SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; -DROP TABLE test_collate; +SELECT 'Order by without collate'; +SELECT * FROM test_collate_null ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru'; +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate_null ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru'; + + +DROP TABLE test_collate; +DROP TABLE test_collate_null; diff --git a/tests/queries/0_stateless/01542_collate_in_array.reference b/tests/queries/0_stateless/01542_collate_in_array.reference new file mode 100644 index 00000000000..2c5a23066f3 --- /dev/null +++ b/tests/queries/0_stateless/01542_collate_in_array.reference @@ -0,0 +1,50 @@ +1 ['а'] +2 ['А'] +1 ['ё'] +1 ['ё','а'] +2 ['ё','а','а'] +1 ['ё','я'] +1 ['Ё'] +2 ['я','а'] +2 ['Я'] + +1 ['а'] +1 ['ё'] +1 ['ё','а'] +1 ['ё','я'] +1 ['Ё'] +2 ['А'] +2 ['ё','а','а'] +2 ['я','а'] +2 ['Я'] + +1 ['а'] +2 ['А'] +1 ['ё'] +1 ['ё','а'] +2 ['ё','а','а',NULL] +1 ['ё',NULL,'я'] +1 ['Ё'] +2 ['я'] +2 [NULL,'Я'] + +1 ['а'] +1 ['ё'] +1 ['ё','а'] +1 ['ё',NULL,'я'] +1 ['Ё'] +2 ['А'] +2 ['ё','а','а',NULL] +2 ['я'] +2 [NULL,'Я'] + +2 [['а','а'],['я','ё']] +1 [['а','Ё'],['ё','я']] +1 [['а','я'],['а','ё']] +2 [['ё']] + +1 [['а','Ё'],['ё','я']] +1 [['а','я'],['а','ё']] +2 [['а','а'],['я','ё']] +2 [['ё']] + diff --git a/tests/queries/0_stateless/01542_collate_in_array.sql b/tests/queries/0_stateless/01542_collate_in_array.sql new file mode 100644 index 00000000000..dd0ec769e7d --- /dev/null +++ b/tests/queries/0_stateless/01542_collate_in_array.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS collate_test1; +DROP TABLE IF EXISTS collate_test2; +DROP TABLE IF EXISTS collate_test3; + +CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory(); +CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory(); +CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory(); + +INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']); +INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]); +INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]); + +SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +DROP TABLE collate_test1; +DROP TABLE collate_test2; +DROP TABLE collate_test3; + diff --git a/tests/queries/0_stateless/01543_collate_in_tuple.reference b/tests/queries/0_stateless/01543_collate_in_tuple.reference new file mode 100644 index 00000000000..fe8f935f0a6 --- /dev/null +++ b/tests/queries/0_stateless/01543_collate_in_tuple.reference @@ -0,0 +1,60 @@ +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +2 (1,'я') +1 (2,'а') +2 (2,'А') +2 (2,'Я') +1 (3,'я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +1 (2,'а') +1 (3,'я') +2 (1,'я') +2 (2,'А') +2 (2,'Я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +2 (1,'я') +1 (1,NULL) +2 (2,'А') +2 (2,'Я') +1 (2,NULL) +2 (2,NULL) +1 (3,'я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +1 (1,NULL) +1 (2,NULL) +1 (3,'я') +2 (1,'я') +2 (2,'А') +2 (2,'Я') +2 (2,NULL) + +2 (1,(1,['А'])) +2 (1,(1,['ё','а','а'])) +1 (1,(1,['Ё'])) +2 (1,(1,['Я'])) +1 (1,(2,['а'])) +1 (1,(2,['ё','я'])) +1 (2,(1,['ё'])) +1 (2,(1,['ё','а'])) +2 (2,(1,['я'])) + +1 (1,(1,['Ё'])) +1 (1,(2,['а'])) +1 (1,(2,['ё','я'])) +1 (2,(1,['ё'])) +1 (2,(1,['ё','а'])) +2 (1,(1,['А'])) +2 (1,(1,['ё','а','а'])) +2 (1,(1,['Я'])) +2 (2,(1,['я'])) + diff --git a/tests/queries/0_stateless/01543_collate_in_tuple.sql b/tests/queries/0_stateless/01543_collate_in_tuple.sql new file mode 100644 index 00000000000..17d9426cf45 --- /dev/null +++ b/tests/queries/0_stateless/01543_collate_in_tuple.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS collate_test1; +DROP TABLE IF EXISTS collate_test2; +DROP TABLE IF EXISTS collate_test3; + +CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory(); +CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory(); +CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory(); + +INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я')); +INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null)); +INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а']))); + +SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +DROP TABLE collate_test1; +DROP TABLE collate_test2; +DROP TABLE collate_test3; + From cf3f39ed29bf5a127c1dcc5c97ded68fb75befc8 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 29 Oct 2020 14:37:00 +0300 Subject: [PATCH 06/11] Small changes --- src/Columns/ColumnLowCardinality.cpp | 2 -- src/Columns/ColumnNullable.cpp | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 37e97da88b9..3f03734b738 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -316,9 +316,7 @@ void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int na size_t unique_limit = getDictionary().size(); Permutation unique_perm; if (collator) - { getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm); - } else getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index cbb82264694..4f2117b1405 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -275,9 +275,7 @@ void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_dir /// Cannot pass limit because of unknown amount of NULLs. if (collator) - { getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res); - } else getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); @@ -453,9 +451,7 @@ void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_ } if (collator) - { getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); - } else getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); From bcd660bb57862b2aae0572518c1ecde2be59c21b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Oct 2020 08:35:18 +0300 Subject: [PATCH 07/11] Minor fixes --- src/Columns/ColumnArray.cpp | 5 +++-- src/Columns/ColumnConst.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index c061dd50642..b420d337701 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -368,13 +368,14 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -struct ColumnArray::Cmp { +struct ColumnArray::Cmp +{ const ColumnArray & parent; int nan_direction_hint; bool reverse; const Collator * collator; - Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) + Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_ = false, const Collator * collator_ = nullptr) : parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {} int operator()(size_t lhs, size_t rhs) const diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 4942d27b6c9..d7a8842bf01 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -248,6 +248,8 @@ public: /// The constant value. It is valid even if the size of the column is 0. template T getValue() const { return getField().safeGet>(); } + + bool isCollationSupported() const override { return true; } }; } From cd86f98aec6ff47185d6972d3509e508db4fdd3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Oct 2020 08:36:27 +0300 Subject: [PATCH 08/11] Minor fixes --- src/Columns/ColumnConst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index d7a8842bf01..3680926cd9b 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -249,7 +249,7 @@ public: template T getValue() const { return getField().safeGet>(); } - bool isCollationSupported() const override { return true; } + bool isCollationSupported() const override { return data->isCollationSupported(); } }; } From 9868b58531b7ce726e11e71a0b0b068cbb73cd06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 31 Oct 2020 03:59:58 +0300 Subject: [PATCH 09/11] Minor change --- src/Columns/ColumnString.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 9ea12041d85..477c098f067 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -407,7 +407,8 @@ struct ColumnString::CmpWithCollation const Collator & collator; bool reverse; - CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} + CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_ = false) + : parent(parent_), collator(collator_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { From 822bbcfdba6544dbcddd2f6985b92a3827504862 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 3 Nov 2020 17:25:52 +0300 Subject: [PATCH 10/11] Make Cmp with template --- src/Columns/ColumnArray.cpp | 32 ++++++++++++++++++++++---------- src/Columns/ColumnArray.h | 1 + src/Columns/ColumnString.cpp | 35 +++++++++++++++++++++++------------ src/Columns/ColumnString.h | 2 ++ src/Columns/ColumnTuple.cpp | 20 +++++++++++++------- src/Columns/ColumnTuple.h | 1 + 6 files changed, 62 insertions(+), 29 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index b420d337701..f03a51e0681 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -368,15 +368,14 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -struct ColumnArray::Cmp -{ +template +struct ColumnArray::Cmp { const ColumnArray & parent; int nan_direction_hint; - bool reverse; const Collator * collator; - Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_ = false, const Collator * collator_ = nullptr) - : parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {} + Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr) + : parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {} int operator()(size_t lhs, size_t rhs) const { @@ -385,7 +384,7 @@ struct ColumnArray::Cmp res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator); else res = parent.compareAt(lhs, rhs, parent, nan_direction_hint); - return reverse ? -res : res; + return positive ? res : -res; } }; @@ -866,22 +865,35 @@ void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRa void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse)); + if (reverse) + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint)); + else + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint)); + } void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const { - updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse)); + if (reverse) + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint)); + else + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint)); } void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse, &collator)); + if (reverse) + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, &collator)); + else + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, &collator)); } void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const { - updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse, &collator)); + if (reverse) + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, &collator)); + else + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, &collator)); } ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 028eaba73c5..8a02af92dce 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -183,6 +183,7 @@ private: template void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const; + template struct Cmp; }; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 477c098f067..23798f64a9c 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -284,18 +284,18 @@ void ColumnString::compareColumn( compare_results, direction, nan_direction_hint); } +template struct ColumnString::Cmp { const ColumnString & parent; - bool reverse; - explicit Cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + explicit Cmp(const ColumnString & parent_) : parent(parent_) {} int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); - return reverse ? -res : res; + return positive ? res : -res; } }; @@ -393,22 +393,27 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const { - getPermutationImpl(limit, res, Cmp(*this, reverse)); + if (reverse) + getPermutationImpl(limit, res, Cmp(*this)); + else + getPermutationImpl(limit, res, Cmp(*this)); } void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, Cmp(*this, reverse)); + if (reverse) + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this)); + else + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this)); } +template struct ColumnString::CmpWithCollation { const ColumnString & parent; const Collator & collator; - bool reverse; - CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_ = false) - : parent(parent_), collator(collator_), reverse(reverse_) {} + CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {} int operator()(size_t lhs, size_t rhs) const { @@ -416,18 +421,24 @@ struct ColumnString::CmpWithCollation reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); - return reverse ? -res : res; + return positive ? res : -res; } }; void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const { - getPermutationImpl(limit, res, CmpWithCollation(*this, collator, reverse)); + if (reverse) + getPermutationImpl(limit, res, CmpWithCollation(*this, collator)); + else + getPermutationImpl(limit, res, CmpWithCollation(*this, collator)); } void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator, reverse)); + if (reverse) + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator)); + else + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator)); } ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const @@ -498,7 +509,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - Cmp cmp_op(*this); + Cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 1e6f60e63b3..b71751dbc4e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -42,8 +42,10 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } + template struct Cmp; + template struct CmpWithCollation; ColumnString() = default; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index f588762fb67..d6e1ca982d6 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -309,15 +309,15 @@ int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); } +template struct ColumnTuple::Less { TupleColumns columns; int nan_direction_hint; - bool reverse; const Collator * collator; - Less(const TupleColumns & columns_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) - : columns(columns_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) + Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr) + : columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_) { } @@ -331,9 +331,9 @@ struct ColumnTuple::Less else res = column->compareAt(a, b, *column, nan_direction_hint); if (res < 0) - return !reverse; + return positive; else if (res > 0) - return reverse; + return !positive; } return false; } @@ -382,7 +382,10 @@ void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_dire void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse)); + if (reverse) + getPermutationImpl(limit, res, Less(columns, nan_direction_hint)); + else + getPermutationImpl(limit, res, Less(columns, nan_direction_hint)); } void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const @@ -392,7 +395,10 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse, &collator)); + if (reverse) + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, &collator)); + else + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, &collator)); } void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index c34768d85a4..0bee3463f2f 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -20,6 +20,7 @@ private: using TupleColumns = std::vector; TupleColumns columns; + template struct Less; explicit ColumnTuple(MutableColumns && columns); From b67465b010048d2c46cb0cf19e16aae9b13035fc Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 3 Nov 2020 18:00:51 +0300 Subject: [PATCH 11/11] Fix style --- src/Columns/ColumnArray.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index f03a51e0681..9b948236943 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -369,7 +369,8 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, } template -struct ColumnArray::Cmp { +struct ColumnArray::Cmp +{ const ColumnArray & parent; int nan_direction_hint; const Collator * collator;