Add reverse parameter to filter, fix tests, reduce coping

This commit is contained in:
Pavel Kruglov 2021-04-22 23:05:50 +03:00 committed by Pavel Kruglov
parent 775d190fb3
commit ae8df9c1c5
35 changed files with 147 additions and 139 deletions

View File

@ -282,7 +282,7 @@ void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start
}
ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_size_hint) const
ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_size_hint, bool reverse) const
{
size_t size = data.size();
if (size != filter.size())
@ -298,7 +298,7 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
for (size_t i = 0; i < size; ++i)
if (filter[i])
if (reverse ^ filter[i])
res_data.push_back(data[i]);
/// To save RAM in case of too strong filtering.

View File

@ -175,7 +175,7 @@ public:
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -533,26 +533,26 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
}
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
if (typeid_cast<const ColumnUInt8 *>(data.get())) return filterNumber<UInt8>(filt, result_size_hint);
if (typeid_cast<const ColumnUInt16 *>(data.get())) return filterNumber<UInt16>(filt, result_size_hint);
if (typeid_cast<const ColumnUInt32 *>(data.get())) return filterNumber<UInt32>(filt, result_size_hint);
if (typeid_cast<const ColumnUInt64 *>(data.get())) return filterNumber<UInt64>(filt, result_size_hint);
if (typeid_cast<const ColumnInt8 *>(data.get())) return filterNumber<Int8>(filt, result_size_hint);
if (typeid_cast<const ColumnInt16 *>(data.get())) return filterNumber<Int16>(filt, result_size_hint);
if (typeid_cast<const ColumnInt32 *>(data.get())) return filterNumber<Int32>(filt, result_size_hint);
if (typeid_cast<const ColumnInt64 *>(data.get())) return filterNumber<Int64>(filt, result_size_hint);
if (typeid_cast<const ColumnFloat32 *>(data.get())) return filterNumber<Float32>(filt, result_size_hint);
if (typeid_cast<const ColumnFloat64 *>(data.get())) return filterNumber<Float64>(filt, result_size_hint);
if (typeid_cast<const ColumnString *>(data.get())) return filterString(filt, result_size_hint);
if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint);
if (typeid_cast<const ColumnNullable *>(data.get())) return filterNullable(filt, result_size_hint);
return filterGeneric(filt, result_size_hint);
if (typeid_cast<const ColumnUInt8 *>(data.get())) return filterNumber<UInt8>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt16 *>(data.get())) return filterNumber<UInt16>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt32 *>(data.get())) return filterNumber<UInt32>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt64 *>(data.get())) return filterNumber<UInt64>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt8 *>(data.get())) return filterNumber<Int8>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt16 *>(data.get())) return filterNumber<Int16>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt32 *>(data.get())) return filterNumber<Int32>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt64 *>(data.get())) return filterNumber<Int64>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnFloat32 *>(data.get())) return filterNumber<Float32>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnFloat64 *>(data.get())) return filterNumber<Float64>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnString *>(data.get())) return filterString(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnNullable *>(data.get())) return filterNullable(filt, result_size_hint, reverse);
return filterGeneric(filt, result_size_hint, reverse);
}
template <typename T>
ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
if (getOffsets().empty())
return ColumnArray::create(data);
@ -562,11 +562,11 @@ ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hin
auto & res_elems = assert_cast<ColumnVector<T> &>(res->getData()).getData();
Offsets & res_offsets = res->getOffsets();
filterArraysImpl<T>(assert_cast<const ColumnVector<T> &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint);
filterArraysImpl<T>(assert_cast<const ColumnVector<T> &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint, reverse);
return res;
}
ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
size_t col_size = getOffsets().size();
if (col_size != filt.size())
@ -604,7 +604,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
/// Number of rows in the array.
size_t array_size = src_offsets[i] - prev_src_offset;
if (filt[i])
if (reverse ^ filt[i])
{
/// If the array is not empty - copy content.
if (array_size)
@ -634,7 +634,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
return res;
}
ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
size_t size = getOffsets().size();
if (size != filt.size())
@ -646,7 +646,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
Filter nested_filt(getOffsets().back());
for (size_t i = 0; i < size; ++i)
{
if (filt[i])
if (reverse ^ filt[i])
memset(&nested_filt[offsetAt(i)], 1, sizeAt(i));
else
memset(&nested_filt[offsetAt(i)], 0, sizeAt(i));
@ -669,7 +669,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
size_t current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
if (filt[i])
if (reverse ^ filt[i])
{
current_offset += sizeAt(i);
res_offsets.push_back(current_offset);
@ -679,7 +679,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
return res;
}
ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
if (getOffsets().empty())
return ColumnArray::create(data);
@ -687,13 +687,13 @@ ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_h
const ColumnNullable & nullable_elems = assert_cast<const ColumnNullable &>(*data);
auto array_of_nested = ColumnArray::create(nullable_elems.getNestedColumnPtr(), offsets);
auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint);
auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint, reverse);
const auto & filtered_array_of_nested = assert_cast<const ColumnArray &>(*filtered_array_of_nested_owner);
const auto & filtered_offsets = filtered_array_of_nested.getOffsetsPtr();
auto res_null_map = ColumnUInt8::create();
filterArraysImplOnlyData(nullable_elems.getNullMapData(), getOffsets(), res_null_map->getData(), filt, result_size_hint);
filterArraysImplOnlyData(nullable_elems.getNullMapData(), getOffsets(), res_null_map->getData(), filt, result_size_hint, reverse);
return ColumnArray::create(
ColumnNullable::create(
@ -702,7 +702,7 @@ ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_h
filtered_offsets);
}
ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
if (getOffsets().empty())
return ColumnArray::create(data);
@ -719,7 +719,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
Columns temporary_arrays(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
temporary_arrays[i] = ColumnArray(tuple.getColumns()[i]->assumeMutable(), getOffsetsPtr()->assumeMutable())
.filter(filt, result_size_hint);
.filter(filt, result_size_hint, reverse);
Columns tuple_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)

View File

@ -70,7 +70,7 @@ public:
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool revers = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
@ -173,12 +173,12 @@ private:
/// Specializations for the filter function.
template <typename T>
ColumnPtr filterNumber(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNumber(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const;
ColumnPtr filterString(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const;
ColumnPtr filterString(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const;
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const;
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;

View File

@ -89,7 +89,7 @@ public:
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t, bool) const override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }

View File

@ -53,13 +53,16 @@ ColumnPtr ColumnConst::removeLowCardinality() const
return ColumnConst::create(data->convertToFullColumnIfLowCardinality(), s);
}
ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/) const
ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/, bool reverse) const
{
if (s != filt.size())
throw Exception("Size of filter (" + toString(filt.size()) + ") doesn't match size of column (" + toString(s) + ")",
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
return ColumnConst::create(data, countBytesInFilter(filt));
size_t new_size = countBytesInFilter(filt);
if (reverse)
new_size = filt.size() - new_size;
return ColumnConst::create(data, new_size);
}
ColumnPtr ColumnConst::replicate(const Offsets & offsets) const

View File

@ -180,7 +180,7 @@ public:
data->updateHashFast(hash);
}
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -292,7 +292,7 @@ void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
}
template <typename T>
ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const
{
size_t size = data.size();
if (size != filt.size())
@ -310,7 +310,7 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
while (filt_pos < filt_end)
{
if (*filt_pos)
if (reverse ^ *filt_pos)
res_data.push_back(*data_pos);
++filt_pos;

View File

@ -150,7 +150,7 @@ public:
UInt64 get64(size_t n) const override;
bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -266,7 +266,7 @@ void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_
memcpy(chars.data() + old_size, &src_concrete.chars[start * n], length * n);
}
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const
{
size_t col_size = size();
if (col_size != filt.size())
@ -296,7 +296,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
while (filt_pos < filt_end_sse)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
mask = reverse ? mask : ~mask;
if (0 == mask)
{
@ -313,7 +313,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
size_t res_chars_size = res->chars.size();
for (size_t i = 0; i < SIMD_BYTES; ++i)
{
if (filt_pos[i])
if (reverse ^ filt_pos[i])
{
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
@ -330,7 +330,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
size_t res_chars_size = res->chars.size();
while (filt_pos < filt_end)
{
if (*filt_pos)
if (reverse ^ *filt_pos)
{
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);

View File

@ -145,7 +145,7 @@ public:
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -54,7 +54,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
return ColumnFunction::create(length, function, capture);
}
ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
if (size_ != filt.size())
throw Exception("Size of filter (" + toString(filt.size()) + ") doesn't match size of column ("
@ -62,11 +62,15 @@ ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint)
ColumnsWithTypeAndName capture = captured_columns;
for (auto & column : capture)
column.column = column.column->filter(filt, result_size_hint);
column.column = column.column->filter(filt, result_size_hint, reverse);
size_t filtered_size = 0;
if (capture.empty())
{
filtered_size = countBytesInFilter(filt);
if (reverse)
filtered_size = filt.size() - filtered_size;
}
else
filtered_size = capture.front().column->size();
@ -203,6 +207,8 @@ ColumnWithTypeAndName ColumnFunction::reduce(bool reduce_arguments) const
columns.reserve(captured_columns.size());
for (const auto & col : captured_columns)
{
LOG_DEBUG(&Poco::Logger::get("ColumnFunction"), "Arg type: {}", col.type->getName());
if (const auto * column_function = typeid_cast<const ColumnFunction *>(col.column.get()))
columns.push_back(column_function->reduce(true));
else

View File

@ -37,7 +37,7 @@ public:
ColumnPtr cut(size_t start, size_t length) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -105,9 +105,9 @@ public:
void updateHashFast(SipHash &) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override
{
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint, reverse));
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override

View File

@ -143,9 +143,9 @@ void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length
start, length);
}
ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
auto filtered = nested->filter(filt, result_size_hint);
auto filtered = nested->filter(filt, result_size_hint, reverse);
return ColumnMap::create(filtered);
}

View File

@ -63,7 +63,7 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;

View File

@ -214,10 +214,10 @@ void ColumnNullable::popBack(size_t n)
getNullMapColumn().popBack(n);
}
ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint);
ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint);
ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint, reverse);
ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint, reverse);
return ColumnNullable::create(filtered_data, filtered_null_map);
}

View File

@ -87,7 +87,7 @@ public:
}
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;

View File

@ -143,7 +143,7 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len
}
ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
if (offsets.empty())
return ColumnString::create();
@ -153,7 +153,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint, reverse);
return res;
}

View File

@ -210,7 +210,7 @@ public:
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -221,13 +221,13 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng
start, length);
}
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
{
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->filter(filt, result_size_hint);
new_columns[i] = columns[i]->filter(filt, result_size_hint, reverse);
return ColumnTuple::create(new_columns);
}

View File

@ -66,7 +66,7 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;

View File

@ -344,7 +344,7 @@ void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
}
template <typename T>
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const
{
size_t size = data.size();
if (size != filt.size())
@ -374,7 +374,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end_sse)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
mask = reverse ? mask : ~mask;
if (0 == mask)
{
@ -387,7 +387,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (filt_pos[i])
if (reverse ^ filt_pos[i])
res_data.push_back(data_pos[i]);
}
@ -398,7 +398,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end)
{
if (*filt_pos)
if (reverse ^ *filt_pos)
res_data.push_back(*data_pos);
++filt_pos;

View File

@ -282,7 +282,7 @@ public:
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse = false) const override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;

View File

@ -192,7 +192,7 @@ namespace
void filterArraysImplGeneric(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets * res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint)
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse)
{
const size_t size = src_offsets.size();
if (size != filt.size())
@ -239,7 +239,7 @@ namespace
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
zero_vec));
mask = ~mask;
mask = reverse ? mask : ~mask;
if (mask == 0)
{
@ -263,7 +263,7 @@ namespace
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (filt_pos[i])
if (reverse ^ filt_pos[i])
copy_array(offsets_pos + i);
}
@ -274,7 +274,7 @@ namespace
while (filt_pos < filt_end)
{
if (*filt_pos)
if (reverse ^ *filt_pos)
copy_array(offsets_pos);
++filt_pos;
@ -288,18 +288,18 @@ template <typename T>
void filterArraysImpl(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets & res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint)
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse)
{
return filterArraysImplGeneric<T, ResultOffsetsBuilder>(src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint);
return filterArraysImplGeneric<T, ResultOffsetsBuilder>(src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint, reverse);
}
template <typename T>
void filterArraysImplOnlyData(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems,
const IColumn::Filter & filt, ssize_t result_size_hint)
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse)
{
return filterArraysImplGeneric<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint);
return filterArraysImplGeneric<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint, reverse);
}
@ -308,11 +308,11 @@ void filterArraysImplOnlyData(
template void filterArraysImpl<TYPE>( \
const PaddedPODArray<TYPE> &, const IColumn::Offsets &, \
PaddedPODArray<TYPE> &, IColumn::Offsets &, \
const IColumn::Filter &, ssize_t); \
const IColumn::Filter &, ssize_t, bool); \
template void filterArraysImplOnlyData<TYPE>( \
const PaddedPODArray<TYPE> &, const IColumn::Offsets &, \
PaddedPODArray<TYPE> &, \
const IColumn::Filter &, ssize_t);
const IColumn::Filter &, ssize_t, bool);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)

View File

@ -32,14 +32,14 @@ template <typename T>
void filterArraysImpl(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets & res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint);
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse = false);
/// Same as above, but not fills res_offsets.
template <typename T>
void filterArraysImplOnlyData(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems,
const IColumn::Filter & filt, ssize_t result_size_hint);
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse = false);
namespace detail
{
@ -66,7 +66,7 @@ ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t
else if (auto * data_uint64 = detail::getIndexesData<UInt64>(indexes))
return column.template indexImpl<UInt64>(*data_uint64, limit);
else
throw Exception("Indexes column for IColumn::select must be ColumnUInt, got " + indexes.getName(),
throw Exception("Indexes column for IColumn::select must be ColumnUInt, got" + indexes.getName(),
ErrorCodes::LOGICAL_ERROR);
}

View File

@ -234,7 +234,7 @@ public:
* otherwise (i.e. < 0), makes reserve() using size of source column.
*/
using Filter = PaddedPODArray<UInt8>;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const = 0;
/// Permutes elements using specified permutation. Is used in sorting.
/// limit - if it isn't 0, puts only first limit elements in the result.

View File

@ -98,9 +98,12 @@ public:
s += length;
}
ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override
ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/, bool reverse = false) const override
{
return cloneDummy(countBytesInFilter(filt));
size_t bytes = countBytesInFilter(filt);
if (reverse)
bytes = filt.size() - bytes;
return cloneDummy(bytes);
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override

View File

@ -134,7 +134,7 @@ public:
throw Exception("Method cut is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
ColumnPtr filter(const IColumn::Filter &, ssize_t) const override
ColumnPtr filter(const IColumn::Filter &, ssize_t, bool) const override
{
throw Exception("Method filter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}

View File

@ -5,6 +5,7 @@
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnLowCardinality.h>
#include <common/logger_useful.h>
namespace DB
{
@ -15,14 +16,14 @@ extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
ColumnPtr expandColumnByMask(const ColumnPtr & column, const PaddedPODArray<UInt8>& mask, Field * field)
ColumnPtr expandColumnByMask(const ColumnPtr & column, const PaddedPODArray<UInt8>& mask, Field * field, bool reverse)
{
MutableColumnPtr res = column->cloneEmpty();
res->reserve(mask.size());
size_t index = 0;
for (size_t i = 0; i != mask.size(); ++i)
{
if (mask[i])
if (reverse ^ mask[i])
{
if (index >= column->size())
throw Exception("Too many bits in mask", ErrorCodes::LOGICAL_ERROR);
@ -43,19 +44,18 @@ ColumnPtr expandColumnByMask(const ColumnPtr & column, const PaddedPODArray<UInt
}
template <typename ValueType>
PaddedPODArray<UInt8> copyMaskImpl(const PaddedPODArray<ValueType>& mask, bool reverse, const PaddedPODArray<UInt8> * null_bytemap, UInt8 null_value)
void copyMaskImpl(const PaddedPODArray<ValueType>& mask, PaddedPODArray<UInt8> & res, bool reverse, const PaddedPODArray<UInt8> * null_bytemap, UInt8 null_value)
{
PaddedPODArray<UInt8> res;
res.reserve(mask.size());
if (res.size() != mask.size())
res.resize(mask.size());
for (size_t i = 0; i != mask.size(); ++i)
{
if (null_bytemap && (*null_bytemap)[i])
res.push_back(reverse ? !null_value : null_value);
res[i] = reverse ? !null_value : null_value;
else
res.push_back(reverse ? !mask[i]: !!mask[i]);
res[i] = reverse ? !mask[i]: !!mask[i];
}
return res;
}
template <typename ValueType>
@ -63,36 +63,35 @@ bool tryGetMaskFromColumn(const ColumnPtr column, PaddedPODArray<UInt8> & res, b
{
if (const auto * col = checkAndGetColumn<ColumnVector<ValueType>>(*column))
{
res = copyMaskImpl(col->getData(), reverse, null_bytemap, null_value);
copyMaskImpl(col->getData(), res, reverse, null_bytemap, null_value);
return true;
}
return false;
}
PaddedPODArray<UInt8> reverseMask(const PaddedPODArray<UInt8> & mask)
{
return copyMaskImpl(mask, true, nullptr, 1);
}
PaddedPODArray<UInt8> getMaskFromColumn(const ColumnPtr & column, bool reverse, const PaddedPODArray<UInt8> * null_bytemap, UInt8 null_value)
void getMaskFromColumn(const ColumnPtr & column, PaddedPODArray<UInt8> & res, bool reverse, const PaddedPODArray<UInt8> * null_bytemap, UInt8 null_value)
{
if (const auto * col = checkAndGetColumn<ColumnConst>(*column))
return getMaskFromColumn(col->convertToFullColumn(), reverse, null_bytemap, null_value);
{
getMaskFromColumn(col->convertToFullColumn(), res, reverse, null_bytemap, null_value);
return;
}
if (const auto * col = checkAndGetColumn<ColumnNothing>(*column))
return PaddedPODArray<UInt8>(col->size(), reverse ? !null_value : null_value);
{
res.resize_fill(col->size(), reverse ? !null_value : null_value);
return;
}
if (const auto * col = checkAndGetColumn<ColumnNullable>(*column))
{
const PaddedPODArray<UInt8> & null_map = checkAndGetColumn<ColumnUInt8>(*col->getNullMapColumnPtr())->getData();
return getMaskFromColumn(col->getNestedColumnPtr(), reverse, &null_map, null_value);
return getMaskFromColumn(col->getNestedColumnPtr(), res, reverse, &null_map, null_value);
}
if (const auto * col = checkAndGetColumn<ColumnLowCardinality>(*column))
return getMaskFromColumn(col->convertToFullColumn(), reverse, null_bytemap, null_value);
PaddedPODArray<UInt8> res;
return getMaskFromColumn(col->convertToFullColumn(), res, reverse, null_bytemap, null_value);
if (!tryGetMaskFromColumn<Int8>(column, res, reverse, null_bytemap, null_value) &&
!tryGetMaskFromColumn<Int16>(column, res, reverse, null_bytemap, null_value) &&
@ -105,8 +104,6 @@ PaddedPODArray<UInt8> getMaskFromColumn(const ColumnPtr & column, bool reverse,
!tryGetMaskFromColumn<Float32>(column, res, reverse, null_bytemap, null_value) &&
!tryGetMaskFromColumn<Float64>(column, res, reverse, null_bytemap, null_value))
throw Exception("Cannot convert column " + column.get()->getName() + " to mask", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
template <typename Op>
@ -129,15 +126,15 @@ void disjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8>
binaryMasksOperationImpl(mask1, mask2, [](const auto & lhs, const auto & rhs){ return lhs | rhs; });
}
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8>& mask, Field * default_value)
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, bool reverse, Field * default_value)
{
const auto * column_function = checkAndGetColumn<ColumnFunction>(*column.column);
if (!column_function)
return;
auto filtered = column_function->filter(mask, -1);
auto filtered = column_function->filter(mask, -1, reverse);
auto result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce(true);
result.column = expandColumnByMask(result.column, mask, default_value);
result.column = expandColumnByMask(result.column, mask, default_value, reverse);
column = std::move(result);
}

View File

@ -7,15 +7,13 @@
namespace DB
{
PaddedPODArray<UInt8> getMaskFromColumn(const ColumnPtr & column, bool reverse = false, const PaddedPODArray<UInt8> * null_bytemap = nullptr, UInt8 null_value = 1);
PaddedPODArray<UInt8> reverseMask(const PaddedPODArray<UInt8> & mask);
void getMaskFromColumn(const ColumnPtr & column, PaddedPODArray<UInt8> & mask, bool reverse = false, const PaddedPODArray<UInt8> * null_bytemap = nullptr, UInt8 null_value = 1);
void conjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2);
void disjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2);
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, Field * default_value = nullptr);
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, bool reverse = false, Field * default_value = nullptr);
void executeColumnIfNeeded(ColumnWithTypeAndName & column);

View File

@ -519,14 +519,14 @@ void FunctionAnyArityLogical<Impl, Name>::executeShortCircuitArguments(ColumnsWi
bool reverse = Name::name != NameAnd::name;
UInt8 null_value = Name::name == NameAnd::name ? 1 : 0;
executeColumnIfNeeded(arguments[0]);
IColumn::Filter mask;
getMaskFromColumn(arguments[0].column, mask, reverse, nullptr, null_value);
for (size_t i = 1; i < arguments.size(); ++i)
{
if (isColumnFunction(*arguments[i].column))
{
IColumn::Filter mask = getMaskFromColumn(arguments[i - 1].column, reverse, nullptr, null_value);
maskedExecute(arguments[i], mask, &default_value);
}
maskedExecute(arguments[i], mask, false, &default_value);
getMaskFromColumn(arguments[i].column, mask, reverse, nullptr, null_value);
}
}

View File

@ -925,9 +925,10 @@ public:
executeColumnIfNeeded(arguments[0]);
if (isColumnFunction(*arguments[1].column) || isColumnFunction(*arguments[2].column))
{
IColumn::Filter mask = getMaskFromColumn(arguments[0].column);
IColumn::Filter mask;
getMaskFromColumn(arguments[0].column, mask);
maskedExecute(arguments[1], mask);
maskedExecute(arguments[2], reverseMask(mask));
maskedExecute(arguments[2], mask, /*reverse=*/true);
}
}

View File

@ -111,22 +111,23 @@ public:
void executeShortCircuitArguments(ColumnsWithTypeAndName & arguments) const override
{
executeColumnIfNeeded(arguments[0]);
IColumn::Filter mask = getMaskFromColumn(arguments[0].column);
IColumn::Filter current_mask;
IColumn::Filter mask_disjunctions = IColumn::Filter(arguments[0].column->size(), 0);
Field default_value = 0;
size_t i = 1;
while (i < arguments.size())
{
getMaskFromColumn(arguments[i - 1].column, current_mask);
disjunctionMasks(mask_disjunctions, current_mask);
if (isColumnFunction(*arguments[i].column))
{
IColumn::Filter cond_mask = getMaskFromColumn(arguments[i - 1].column);
maskedExecute(arguments[i], cond_mask);
}
maskedExecute(arguments[i], current_mask);
++i;
if (isColumnFunction(*arguments[i].column))
maskedExecute(arguments[i], reverseMask(mask), &default_value);
if (i != arguments.size() - 1)
disjunctionMasks(mask, getMaskFromColumn(arguments[i].column));
maskedExecute(arguments[i], mask_disjunctions, true, &default_value);
++i;
}
}

View File

@ -367,6 +367,7 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon
column.column = column_function->reduce(true).column;
}
if (!action.arguments[i].needed_later)
arguments[i] = std::move(column);
else
@ -487,12 +488,10 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon
else
{
auto & column = inputs[pos];
if (!action.node->children.empty() && action.node->children.back()->type == ActionsDAG::ActionType::COLUMN_FUNCTION)
{
const ColumnFunction * column_function = typeid_cast<const ColumnFunction *>(column.column.get());
if (column_function)
column.column = column_function->reduce(true).column;
}
const ColumnFunction * column_function = typeid_cast<const ColumnFunction *>(column.column.get());
if (column_function && column.type->getTypeId() != TypeIndex::Function)
column.column = column_function->reduce(true).column;
columns[action.result_position] = std::move(column);
}