generalize code in IColumn::permute

This commit is contained in:
Anton Popov 2021-09-29 20:51:58 +03:00
parent 00d8eee34e
commit 0dfe96fc02
11 changed files with 40 additions and 177 deletions

View File

@ -316,23 +316,7 @@ void ColumnAggregateFunction::expand(const Filter & mask, bool inverted)
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const
{
size_t size = data.size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
if (perm.size() < limit)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
auto res = createView();
res->data.resize(limit);
for (size_t i = 0; i < limit; ++i)
res->data[i] = data[perm[i]];
return res;
return permuteImpl(*this, perm, limit);
}
ColumnPtr ColumnAggregateFunction::index(const IColumn & indexes, size_t limit) const
@ -343,6 +327,7 @@ ColumnPtr ColumnAggregateFunction::index(const IColumn & indexes, size_t limit)
template <typename Type>
ColumnPtr ColumnAggregateFunction::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
assert(limit <= indexes.size());
auto res = createView();
res->data.resize(limit);

View File

@ -762,39 +762,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
ColumnPtr ColumnArray::permute(const Permutation & perm, size_t limit) const
{
size_t size = getOffsets().size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
if (perm.size() < limit)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (limit == 0)
return ColumnArray::create(data);
Permutation nested_perm(getOffsets().back());
auto res = ColumnArray::create(data->cloneEmpty());
Offsets & res_offsets = res->getOffsets();
res_offsets.resize(limit);
size_t current_offset = 0;
for (size_t i = 0; i < limit; ++i)
{
for (size_t j = 0; j < sizeAt(perm[i]); ++j)
nested_perm[current_offset + j] = offsetAt(perm[i]) + j;
current_offset += sizeAt(perm[i]);
res_offsets[i] = current_offset;
}
if (current_offset != 0)
res->data = data->permute(nested_perm, current_offset);
return res;
return permuteImpl(*this, perm, limit);
}
ColumnPtr ColumnArray::index(const IColumn & indexes, size_t limit) const
@ -805,8 +773,9 @@ ColumnPtr ColumnArray::index(const IColumn & indexes, size_t limit) const
template <typename T>
ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit) const
{
assert(limit <= indexes.size());
if (limit == 0)
return ColumnArray::create(data);
return ColumnArray::create(data->cloneEmpty());
/// Convert indexes to UInt64 in case of overflow.
auto nested_indexes_column = ColumnUInt64::create();

View File

@ -93,15 +93,7 @@ ColumnPtr ColumnConst::replicate(const Offsets & offsets) const
ColumnPtr ColumnConst::permute(const Permutation & perm, size_t limit) const
{
if (limit == 0)
limit = s;
else
limit = std::min(s, limit);
if (perm.size() < limit)
throw Exception("Size of permutation (" + toString(perm.size()) + ") is less than required (" + toString(limit) + ")",
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
limit = getLimitForPermutation(*this, perm, limit);
return ColumnConst::create(data, limit);
}

View File

@ -231,17 +231,7 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t limit) const
{
size_t size = limit ? std::min(data.size(), limit) : data.size();
if (perm.size() < size)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
auto res = this->create(size, scale);
typename Self::Container & res_data = res->getData();
for (size_t i = 0; i < size; ++i)
res_data[i] = data[perm[i]];
return res;
return permuteImpl(*this, perm, limit);
}
template <is_decimal T>

View File

@ -219,12 +219,7 @@ template <is_decimal T>
template <typename Type>
ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
size_t size = indexes.size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
assert(limit <= indexes.size());
auto res = this->create(limit, scale);
typename Self::Container & res_data = res->getData();

View File

@ -372,30 +372,7 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted)
ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const
{
size_t col_size = size();
if (limit == 0)
limit = col_size;
else
limit = std::min(col_size, limit);
if (perm.size() < limit)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (limit == 0)
return ColumnFixedString::create(n);
auto res = ColumnFixedString::create(n);
Chars & res_chars = res->chars;
res_chars.resize(n * limit);
size_t offset = 0;
for (size_t i = 0; i < limit; ++i, offset += n)
memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[perm[i] * n], n);
return res;
return permuteImpl(*this, perm, limit);
}
@ -408,6 +385,7 @@ ColumnPtr ColumnFixedString::index(const IColumn & indexes, size_t limit) const
template <typename Type>
ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
assert(limit <= indexes.size());
if (limit == 0)
return ColumnFixedString::create(n);

View File

@ -93,14 +93,7 @@ void ColumnFunction::expand(const Filter & mask, bool inverted)
ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const
{
if (limit == 0)
limit = size_;
else
limit = std::min(size_, limit);
if (perm.size() < limit)
throw Exception("Size of permutation (" + toString(perm.size()) + ") is less than required ("
+ toString(limit) + ")", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
limit = getLimitForPermutation(*this, perm, limit);
ColumnsWithTypeAndName capture = captured_columns;
for (auto & column : capture)

View File

@ -208,51 +208,7 @@ void ColumnString::expand(const IColumn::Filter & mask, bool inverted)
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
{
size_t size = offsets.size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
if (perm.size() < limit)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (limit == 0)
return ColumnString::create();
auto res = ColumnString::create();
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;
if (limit == size)
res_chars.resize(chars.size());
else
{
size_t new_chars_size = 0;
for (size_t i = 0; i < limit; ++i)
new_chars_size += sizeAt(perm[i]);
res_chars.resize(new_chars_size);
}
res_offsets.resize(limit);
Offset current_new_offset = 0;
for (size_t i = 0; i < limit; ++i)
{
size_t j = perm[i];
size_t string_offset = offsets[j - 1];
size_t string_size = offsets[j] - string_offset;
memcpySmallAllowReadWriteOverflow15(&res_chars[current_new_offset], &chars[string_offset], string_size);
current_new_offset += string_size;
res_offsets[i] = current_new_offset;
}
return res;
return permuteImpl(*this, perm, limit);
}
@ -300,6 +256,7 @@ ColumnPtr ColumnString::index(const IColumn & indexes, size_t limit) const
template <typename Type>
ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
assert(limit <= indexes.size());
if (limit == 0)
return ColumnString::create();

View File

@ -443,22 +443,7 @@ void ColumnVector<T>::applyZeroMap(const IColumn::Filter & filt, bool inverted)
template <typename T>
ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, size_t limit) const
{
size_t size = data.size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
if (perm.size() < limit)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
auto res = this->create(limit);
typename Self::Container & res_data = res->getData();
for (size_t i = 0; i < limit; ++i)
res_data[i] = data[perm[i]];
return res;
return permuteImpl(*this, perm, limit);
}
template <typename T>

View File

@ -360,12 +360,7 @@ template <typename T>
template <typename Type>
ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{
size_t size = indexes.size();
if (limit == 0)
limit = size;
else
limit = std::min(size, limit);
assert(limit <= indexes.size());
auto res = this->create(limit);
typename Self::Container & res_data = res->getData();

View File

@ -1,6 +1,7 @@
#pragma once
#include <Columns/IColumn.h>
#include <Common/PODArray.h>
/// Common helper methods for implementation of different columns.
@ -55,7 +56,8 @@ ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t
limit = indexes.size();
if (indexes.size() < limit)
throw Exception("Size of indexes is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH,
"Size of indexes ({}) is less than required ({})", indexes.size(), limit);
if (auto * data_uint8 = detail::getIndexesData<UInt8>(indexes))
return column.template indexImpl<UInt8>(*data_uint8, limit);
@ -70,6 +72,28 @@ ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t
ErrorCodes::LOGICAL_ERROR);
}
template <typename Column>
size_t getLimitForPermutation(const Column & column, const IColumn::Permutation & perm, size_t limit)
{
if (limit == 0)
limit = column.size();
else
limit = std::min(column.size(), limit);
if (perm.size() < limit)
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH,
"Size of permutation ({}) is less than required ({})", perm.size(), limit);
return limit;
}
template <typename Column>
ColumnPtr permuteImpl(const Column & column, const IColumn::Permutation & perm, size_t limit)
{
limit = getLimitForPermutation(column, perm, limit);
return column.indexImpl(perm, limit);
}
#define INSTANTIATE_INDEX_IMPL(Column) \
template ColumnPtr Column::indexImpl<UInt8>(const PaddedPODArray<UInt8> & indexes, size_t limit) const; \
template ColumnPtr Column::indexImpl<UInt16>(const PaddedPODArray<UInt16> & indexes, size_t limit) const; \