Support collation for Array and Tuple

This commit is contained in:
Pavel Kruglov 2020-10-29 14:24:01 +03:00
parent 5c296365e2
commit 4d399fff3e
20 changed files with 524 additions and 198 deletions

View File

@ -242,6 +242,8 @@ TESTS_TO_SKIP=(
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
_orc_
arrow
avro

View File

@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n)
offsets_data.resize_assume_reserved(offsets_data.size() - n);
}
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const
{
const ColumnArray & rhs = assert_cast<const ColumnArray &>(rhs_);
@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
size_t rhs_size = rhs.sizeAt(m);
size_t min_size = std::min(lhs_size, rhs_size);
for (size_t i = 0; i < min_size; ++i)
if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint))
{
int res;
if (collator)
res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator);
else
res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint);
if (res)
return res;
}
return lhs_size < rhs_size
? -1
@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir
: 1);
}
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint);
}
int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
}
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -352,27 +368,25 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
namespace
{
template <bool positive>
struct Less
struct ColumnArray::Cmp {
const ColumnArray & parent;
int nan_direction_hint;
bool reverse;
const Collator * collator;
Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr)
: parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {}
int operator()(size_t lhs, size_t rhs) const
{
const ColumnArray & parent;
int nan_direction_hint;
Less(const ColumnArray & parent_, int nan_direction_hint_)
: parent(parent_), nan_direction_hint(nan_direction_hint_) {}
bool operator()(size_t lhs, size_t rhs) const
{
if (positive)
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0;
else
return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0;
}
};
}
int res;
if (collator)
res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator);
else
res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
return reverse ? -res : res;
}
};
void ColumnArray::reserve(size_t n)
{
@ -753,7 +767,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit
INSTANTIATE_INDEX_IMPL(ColumnArray)
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
template <typename Comparator>
void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{
size_t s = size();
if (limit >= s)
@ -763,23 +778,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h
for (size_t i = 0; i < s; ++i)
res[i] = i;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(*this, nan_direction_hint));
}
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
else
{
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(*this, nan_direction_hint));
}
std::sort(res.begin(), res.end(), less);
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
template <typename Comparator>
void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const
{
if (equal_range.empty())
return;
@ -792,20 +800,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
if (limit)
--number_of_ranges;
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
EqualRanges new_ranges;
for (size_t i = 0; i < number_of_ranges; ++i)
{
const auto & [first, last] = equal_range[i];
if (reverse)
std::sort(res.begin() + first, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::sort(res.begin() + first, res.begin() + last, Less<true>(*this, nan_direction_hint));
std::sort(res.begin() + first, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (cmp(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -827,14 +834,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
/// Since then we are working inside the interval.
if (reverse)
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<false>(*this, nan_direction_hint));
else
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less<true>(*this, nan_direction_hint));
std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less);
auto new_first = first;
for (auto j = first + 1; j < limit; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0)
if (cmp(res[new_first], res[j]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -845,7 +849,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
auto new_last = limit;
for (auto j = limit; j < last; ++j)
{
if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0)
if (cmp(res[new_first], res[j]) == 0)
{
std::swap(res[new_last], res[j]);
++new_last;
@ -859,6 +863,26 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio
equal_range = std::move(new_ranges);
}
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse));
}
void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse));
}
void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse, &collator));
}
void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const
{
updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse, &collator));
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{
if (replicate_offsets.empty())

View File

@ -77,8 +77,11 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
@ -132,6 +135,8 @@ public:
return false;
}
bool isCollationSupported() const override { return getData().isCollationSupported(); }
private:
WrappedPtr data;
WrappedPtr offsets;
@ -169,6 +174,16 @@ private:
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
struct Cmp;
};

View File

@ -15,7 +15,6 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int BAD_COLLATION;
}
namespace
@ -280,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size));
}
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const auto & low_cardinality_column = assert_cast<const ColumnLowCardinality &>(rhs);
size_t n_index = getIndexes().getUInt(n);
size_t m_index = low_cardinality_column.getIndexes().getUInt(m);
if (collator)
return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator);
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -306,12 +317,7 @@ void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int na
Permutation unique_perm;
if (collator)
{
/// Collations are supported only for ColumnString
const ColumnString * column_string = checkAndGetColumn<ColumnString>(getDictionary().getNestedColumn().get());
if (!column_string)
throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION);
column_string->getPermutationWithCollation(*collator, reverse, unique_limit, unique_perm);
getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm);
}
else
getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
@ -438,16 +444,11 @@ void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator
getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator);
}
void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
/// Collations are supported only for ColumnString
const ColumnString * column_string = checkAndGetColumn<ColumnString>(getDictionary().getNestedColumn().get());
if (!column_string)
throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION);
auto comparator = [this, &column_string, &collator, reverse](size_t lhs, size_t rhs)
auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs)
{
int ret = column_string->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *column_string, collator);
int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator);
return reverse ? -ret : ret;
};

View File

@ -31,11 +31,6 @@ class ColumnLowCardinality final : public COWHelper<IColumn, ColumnLowCardinalit
ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
ColumnLowCardinality(const ColumnLowCardinality & other) = default;
void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
template <typename Cmp>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
@ -130,13 +125,15 @@ public:
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
ColumnPtr replicate(const Offsets & offsets) const override
{
@ -179,6 +176,7 @@ public:
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
bool isNumeric() const override { return getDictionary().isNumeric(); }
bool lowCardinality() const override { return true; }
bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); }
/**
* Checks if the dictionary column is Nullable(T).
@ -318,6 +316,13 @@ private:
void compactInplace();
void compactIfSharedDictionary();
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
template <typename Cmp>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
};

View File

@ -18,7 +18,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
extern const int BAD_COLLATION;
}
@ -225,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
return ColumnNullable::create(indexed_data, indexed_null_map);
}
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const
{
/// NULL values share the properties of NaN values.
/// Here the last parameter of compareAt is called null_direction_hint
@ -247,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null
}
const IColumn & nested_rhs = nullable_rhs.getNestedColumn();
if (collator)
return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator);
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint);
}
int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -264,12 +276,7 @@ void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_dir
if (collator)
{
/// Collations are supported only for ColumnString
const ColumnString * column_string = checkAndGetColumn<ColumnString>(&getNestedColumn());
if (!column_string)
throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION);
column_string->getPermutationWithCollation(*collator, reverse, 0, res);
getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res);
}
else
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
@ -447,12 +454,7 @@ void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_
if (collator)
{
/// Collations are supported only for ColumnString
const ColumnString * column_string = checkAndGetColumn<ColumnString>(getNestedColumn());
if (!column_string)
throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION);
column_string->updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges);
getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges);
}
else
getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges);

View File

@ -31,11 +31,6 @@ private:
ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_);
ColumnNullable(const ColumnNullable &) = default;
void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
void updatePermutationImpl(
bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const;
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
@ -98,11 +93,12 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const;
void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const;
const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
@ -138,6 +134,7 @@ public:
bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); }
size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); }
bool onlyNull() const override { return nested_column->isDummy(); }
bool isCollationSupported() const override { return nested_column->isCollationSupported(); }
/// Return the column that represents values.
@ -173,6 +170,13 @@ private:
template <bool negative>
void applyNullMapImpl(const ColumnUInt8 & map);
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const;
void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const;
void updatePermutationImpl(
bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const;
};
ColumnPtr makeNullable(const ColumnPtr & column);

View File

@ -284,11 +284,11 @@ void ColumnString::compareColumn(
compare_results, direction, nan_direction_hint);
}
struct ColumnString::cmp
struct ColumnString::Cmp
{
const ColumnString & parent;
bool reverse;
explicit cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {}
explicit Cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {}
int operator()(size_t lhs, size_t rhs) const
{
int res = memcmpSmallAllowOverflow15(
@ -299,8 +299,8 @@ struct ColumnString::cmp
}
};
template <typename Cmp>
void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const
template <typename Comparator>
void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const
{
size_t s = offsets.size();
res.resize(s);
@ -310,7 +310,7 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp compa
if (limit >= s)
limit = 0;
auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; };
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
if (limit)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
@ -318,8 +318,8 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp compa
std::sort(res.begin(), res.end(), less);
}
template <typename Cmp>
void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const
template <typename Comparator>
void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const
{
if (equal_ranges.empty())
return;
@ -334,7 +334,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR
if (limit)
--number_of_ranges;
auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; };
auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; };
for (size_t i = 0; i < number_of_ranges; ++i)
{
@ -344,7 +344,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR
size_t new_first = first;
for (size_t j = first + 1; j < last; ++j)
{
if (comparator(res[j], res[new_first]) != 0)
if (cmp(res[j], res[new_first]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -370,7 +370,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR
size_t new_first = first;
for (size_t j = first + 1; j < limit; ++j)
{
if (comparator(res[j], res[new_first]) != 0)
if (cmp(res[j], res[new_first]) != 0)
{
if (j - new_first > 1)
new_ranges.emplace_back(new_first, j);
@ -380,7 +380,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR
size_t new_last = limit;
for (size_t j = limit; j < last; ++j)
{
if (comparator(res[j], res[new_first]) == 0)
if (cmp(res[j], res[new_first]) == 0)
{
std::swap(res[j], res[new_last]);
++new_last;
@ -393,21 +393,21 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
{
getPermutationImpl(limit, res, cmp(*this, reverse));
getPermutationImpl(limit, res, Cmp(*this, reverse));
}
void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(limit, res, equal_ranges, cmp(*this, reverse));
updatePermutationImpl(limit, res, equal_ranges, Cmp(*this, reverse));
}
struct ColumnString::cmpWithCollation
struct ColumnString::CmpWithCollation
{
const ColumnString & parent;
const Collator & collator;
bool reverse;
cmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {}
CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {}
int operator()(size_t lhs, size_t rhs) const
{
@ -419,17 +419,16 @@ struct ColumnString::cmpWithCollation
}
};
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const
{
getPermutationImpl(limit, res, cmpWithCollation(*this, collator, reverse));
getPermutationImpl(limit, res, CmpWithCollation(*this, collator, reverse));
}
void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(limit, res, equal_ranges, cmpWithCollation(*this, collator, reverse));
updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator, reverse));
}
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
{
size_t col_size = size();
@ -498,7 +497,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const
size_t min_idx = 0;
size_t max_idx = 0;
cmp cmp_op(*this);
Cmp cmp_op(*this);
for (size_t i = 1; i < col_size; ++i)
{
@ -513,7 +512,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const
}
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
{
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);

View File

@ -42,18 +42,18 @@ private:
/// Size of i-th element, including terminating zero.
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
struct cmp;
struct Cmp;
struct cmpWithCollation;
struct CmpWithCollation;
ColumnString() = default;
ColumnString(const ColumnString & src);
template <typename Cmp>
void getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const;
template <typename Comparator>
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
template <typename Cmp>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const;
template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const;
public:
const char * getFamilyName() const override { return "String"; }
@ -233,16 +233,16 @@ public:
int direction, int nan_direction_hint) const override;
/// Variant of compareAt for string comparison with respect of collation.
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
/// Sorting with respect of collation.
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
@ -274,6 +274,8 @@ public:
// Throws an exception if offsets/chars are messed up
void validate() const;
bool isCollationSupported() const override { return true; }
};

View File

@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se
return res;
}
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const
{
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
if (int res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint))
{
int res;
if (collator && columns[i]->isCollationSupported())
res = columns[i]->compareAtWithCollation(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint, *collator);
else
res = columns[i]->compareAt(n, m, *assert_cast<const ColumnTuple &>(rhs).columns[i], nan_direction_hint);
if (res)
return res;
}
return 0;
}
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
template <bool positive>
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
struct ColumnTuple::Less
{
TupleColumns columns;
int nan_direction_hint;
bool reverse;
const Collator * collator;
Less(const TupleColumns & columns_, int nan_direction_hint_)
: columns(columns_), nan_direction_hint(nan_direction_hint_)
Less(const TupleColumns & columns_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr)
: columns(columns_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_)
{
}
@ -308,17 +325,22 @@ struct ColumnTuple::Less
{
for (const auto & column : columns)
{
int res = column->compareAt(a, b, *column, nan_direction_hint);
int res;
if (collator && column->isCollationSupported())
res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator);
else
res = column->compareAt(a, b, *column, nan_direction_hint);
if (res < 0)
return positive;
return !reverse;
else if (res > 0)
return !positive;
return reverse;
}
return false;
}
};
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
template <typename LessOperator>
void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const
{
size_t rows = size();
res.resize(rows);
@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h
if (limit)
{
if (reverse)
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<false>(columns, nan_direction_hint));
else
std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less<true>(columns, nan_direction_hint));
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less);
}
else
{
if (reverse)
std::sort(res.begin(), res.end(), Less<false>(columns, nan_direction_hint));
else
std::sort(res.begin(), res.end(), Less<true>(columns, nan_direction_hint));
std::sort(res.begin(), res.end(), less);
}
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const
{
if (equal_ranges.empty())
return;
for (const auto & column : columns)
{
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
if (collator && column->isCollationSupported())
column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges);
else
column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges);
while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first)
equal_ranges.pop_back();
@ -361,6 +380,26 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio
}
}
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse));
}
void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges);
}
void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse, &collator));
}
void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const
{
updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator);
}
void ColumnTuple::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
@ -433,5 +472,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
return false;
}
bool ColumnTuple::isCollationSupported() const
{
for (const auto& column : columns)
{
if (column->isCollationSupported())
return true;
}
return false;
}
}

View File

@ -20,7 +20,6 @@ private:
using TupleColumns = std::vector<WrappedPtr>;
TupleColumns columns;
template <bool positive>
struct Less;
explicit ColumnTuple(MutableColumns && columns);
@ -75,15 +74,19 @@ public:
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override;
void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
void reserve(size_t n) override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
size_t tupleSize() const { return columns.size(); }
@ -94,6 +97,15 @@ public:
Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }
const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
private:
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
template <typename LessOperator>
void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const;
void updatePermutationImpl(
bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const;
};

View File

@ -9,7 +9,7 @@
class SipHash;
class Collator;
namespace DB
{
@ -18,6 +18,7 @@ namespace ErrorCodes
{
extern const int CANNOT_GET_SIZE_OF_FIELD;
extern const int NOT_IMPLEMENTED;
extern const int BAD_COLLATION;
}
class Arena;
@ -250,6 +251,12 @@ public:
*/
virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
/// Equivalent to compareAt, but collator is used to compare values.
virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
}
/// Compare the whole column with single value from rhs column.
/// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare.
/// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction
@ -277,6 +284,18 @@ public:
*/
virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0;
/** Equivalent to getPermutation and updatePermutation but collator is used to compare values.
* Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them.
*/
virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
}
/** Copies each element according offsets parameter.
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
* It is necessary in ARRAY JOIN operation.
@ -402,6 +421,8 @@ public:
virtual bool lowCardinality() const { return false; }
virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
IColumn() = default;
IColumn(const IColumn &) = default;

View File

@ -96,7 +96,7 @@ struct SortCursorImpl
: column_desc.column_number;
sort_columns.push_back(columns[column_number].get());
need_collation[j] = desc[j].collator != nullptr && typeid_cast<const ColumnString *>(sort_columns.back()); /// TODO Nullable(String)
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String)
has_collation |= need_collation[j];
}
@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
int nulls_direction = desc.nulls_direction;
int res;
if (impl->need_collation[i])
{
const ColumnString & column_string = assert_cast<const ColumnString &>(*impl->sort_columns[i]);
res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator);
}
res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator);
else
res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction);

View File

@ -22,24 +22,6 @@ static bool isCollationRequired(const SortColumnDescription & description)
return description.collator != nullptr;
}
static bool isCollationSupported(const IColumn * column)
{
if (column->getDataType() == TypeIndex::String)
return true;
if (column->getDataType() == TypeIndex::Nullable)
{
const ColumnNullable * column_nullable = assert_cast<const ColumnNullable *>(column);
return isCollationSupported(&column_nullable->getNestedColumn());
}
if (column->getDataType() == TypeIndex::LowCardinality)
{
const ColumnLowCardinality * column_low_cardinality = assert_cast<const ColumnLowCardinality *>(column);
return isCollationSupported(column_low_cardinality->getDictionary().getNestedColumn().get());
}
return false;
}
ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description)
{
@ -106,8 +88,7 @@ struct PartialSortingLessWithCollation
}
else if (isCollationRequired(elem.description))
{
const ColumnString & column_string = assert_cast<const ColumnString &>(*elem.column);
res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator);
res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator);
}
else
res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction);
@ -139,18 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
bool is_column_const = false;
if (isCollationRequired(description[0]))
{
/// Check if column supports collations
if (!isCollationSupported(column))
throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION);
if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
if (const ColumnString * column_string = checkAndGetColumn<ColumnString>(column))
column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm);
else if (const ColumnNullable * column_nullable = checkAndGetColumn<ColumnNullable>(column))
column_nullable->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn<ColumnLowCardinality>(column))
column_low_cardinality->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
else if (isColumnConst(*column))
if (isColumnConst(*column))
is_column_const = true;
else
column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm);
}
else if (!isColumnConst(*column))
{
@ -186,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
const IColumn * column = columns_with_sort_desc[i].column;
if (isCollationRequired(description[i]))
{
if (!isCollationSupported(column))
throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION);
if (!column->isCollationSupported())
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION);
need_collation = true;
}
@ -210,20 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
if (isCollationRequired(column.description))
{
if (const ColumnString * column_string = checkAndGetColumn<ColumnString>(column.column))
column_string->updatePermutationWithCollation(
*column.description.collator,
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
else if (const ColumnNullable * column_nullable = checkAndGetColumn<ColumnNullable>(column.column))
column_nullable->updatePermutationWithCollation(
*column.description.collator,
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn<ColumnLowCardinality>(column.column))
column_low_cardinality->updatePermutationWithCollation(
*column.description.collator,
column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
column.column->updatePermutationWithCollation(
*column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges);
}
else
{

View File

@ -26,3 +26,39 @@ Order by tuple with collate
2 А
2 я
2 Я
Order by without collate
1 Ё
2 А
2 Я
1 а
2 я
1 ё
1 \N
2 \N
Order by with collate
1 а
2 А
1 ё
1 Ё
2 я
2 Я
1 \N
2 \N
Order by tuple without collate
1 Ё
1 а
1 ё
1 \N
2 А
2 Я
2 я
2 \N
Order by tuple with collate
1 а
1 ё
1 Ё
1 \N
2 А
2 я
2 Я
2 \N

View File

@ -1,8 +1,12 @@
DROP TABLE IF EXISTS test_collate;
DROP TABLE IF EXISTS test_collate_null;
CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory();
CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory();
INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я');
INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null);
SELECT 'Order by without collate';
SELECT * FROM test_collate ORDER BY s;
@ -14,5 +18,16 @@ SELECT * FROM test_collate ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru';
DROP TABLE test_collate;
SELECT 'Order by without collate';
SELECT * FROM test_collate_null ORDER BY s;
SELECT 'Order by with collate';
SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru';
SELECT 'Order by tuple without collate';
SELECT * FROM test_collate_null ORDER BY x, s;
SELECT 'Order by tuple with collate';
SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru';
DROP TABLE test_collate;
DROP TABLE test_collate_null;

View File

@ -0,0 +1,50 @@
1 ['а']
2 ['А']
1 ['ё']
1 ['ё','а']
2 ['ё','а','а']
1 ['ё','я']
1 ['Ё']
2 ['я','а']
2 ['Я']
1 ['а']
1 ['ё']
1 ['ё','а']
1 ['ё','я']
1 ['Ё']
2 ['А']
2 ['ё','а','а']
2 ['я','а']
2 ['Я']
1 ['а']
2 ['А']
1 ['ё']
1 ['ё','а']
2 ['ё','а','а',NULL]
1 ['ё',NULL,'я']
1 ['Ё']
2 ['я']
2 [NULL,'Я']
1 ['а']
1 ['ё']
1 ['ё','а']
1 ['ё',NULL,'я']
1 ['Ё']
2 ['А']
2 ['ё','а','а',NULL]
2 ['я']
2 [NULL,'Я']
2 [['а','а'],['я','ё']]
1 [['а','Ё'],['ё','я']]
1 [['а','я'],['а','ё']]
2 [['ё']]
1 [['а','Ё'],['ё','я']]
1 [['а','я'],['а','ё']]
2 [['а','а'],['я','ё']]
2 [['ё']]

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS collate_test1;
DROP TABLE IF EXISTS collate_test2;
DROP TABLE IF EXISTS collate_test3;
CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory();
CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory();
CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory();
INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']);
INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]);
INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]);
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
SELECT '';
DROP TABLE collate_test1;
DROP TABLE collate_test2;
DROP TABLE collate_test3;

View File

@ -0,0 +1,60 @@
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
2 (1,'я')
1 (2,'а')
2 (2,'А')
2 (2,'Я')
1 (3,'я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
1 (2,'а')
1 (3,'я')
2 (1,'я')
2 (2,'А')
2 (2,'Я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
2 (1,'я')
1 (1,NULL)
2 (2,'А')
2 (2,'Я')
1 (2,NULL)
2 (2,NULL)
1 (3,'я')
1 (1,'а')
1 (1,'ё')
1 (1,'Ё')
1 (1,NULL)
1 (2,NULL)
1 (3,'я')
2 (1,'я')
2 (2,'А')
2 (2,'Я')
2 (2,NULL)
2 (1,(1,['А']))
2 (1,(1,['ё','а','а']))
1 (1,(1,['Ё']))
2 (1,(1,['Я']))
1 (1,(2,['а']))
1 (1,(2,['ё','я']))
1 (2,(1,['ё']))
1 (2,(1,['ё','а']))
2 (2,(1,['я']))
1 (1,(1,['Ё']))
1 (1,(2,['а']))
1 (1,(2,['ё','я']))
1 (2,(1,['ё']))
1 (2,(1,['ё','а']))
2 (1,(1,['А']))
2 (1,(1,['ё','а','а']))
2 (1,(1,['Я']))
2 (2,(1,['я']))

View File

@ -0,0 +1,34 @@
DROP TABLE IF EXISTS collate_test1;
DROP TABLE IF EXISTS collate_test2;
DROP TABLE IF EXISTS collate_test3;
CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory();
CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory();
CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory();
INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я'));
INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null));
INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а'])));
SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru';
SELECT '';
SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru';
SELECT '';
DROP TABLE collate_test1;
DROP TABLE collate_test2;
DROP TABLE collate_test3;