Small refactoring and adding comments

This commit is contained in:
Pavel Kruglov 2021-05-18 16:05:55 +03:00
parent d8f590febe
commit 62d4f4b25c
37 changed files with 290 additions and 332 deletions

View File

@ -283,7 +283,7 @@ void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start
}
ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_size_hint, bool inverse) const
{
size_t size = data.size();
if (size != filter.size())
@ -299,7 +299,7 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
for (size_t i = 0; i < size; ++i)
if (reverse ^ filter[i])
if (inverse ^ filter[i])
res_data.push_back(data[i]);
/// To save RAM in case of too strong filtering.
@ -309,9 +309,9 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
return res;
}
void ColumnAggregateFunction::expand(const Filter & mask, bool reverse)
void ColumnAggregateFunction::expand(const Filter & mask, bool inverse)
{
expandDataByMask<char *>(data, mask, reverse, nullptr);
expandDataByMask<char *>(data, mask, inverse);
}
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const

View File

@ -175,9 +175,9 @@ public:
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint, bool reverse) const override;
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool reverse) override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -534,31 +534,31 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
}
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
if (typeid_cast<const ColumnUInt8 *>(data.get())) return filterNumber<UInt8>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt16 *>(data.get())) return filterNumber<UInt16>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt32 *>(data.get())) return filterNumber<UInt32>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt64 *>(data.get())) return filterNumber<UInt64>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt8 *>(data.get())) return filterNumber<Int8>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt16 *>(data.get())) return filterNumber<Int16>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt32 *>(data.get())) return filterNumber<Int32>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnInt64 *>(data.get())) return filterNumber<Int64>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnFloat32 *>(data.get())) return filterNumber<Float32>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnFloat64 *>(data.get())) return filterNumber<Float64>(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnString *>(data.get())) return filterString(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnNullable *>(data.get())) return filterNullable(filt, result_size_hint, reverse);
return filterGeneric(filt, result_size_hint, reverse);
if (typeid_cast<const ColumnUInt8 *>(data.get())) return filterNumber<UInt8>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnUInt16 *>(data.get())) return filterNumber<UInt16>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnUInt32 *>(data.get())) return filterNumber<UInt32>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnUInt64 *>(data.get())) return filterNumber<UInt64>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnInt8 *>(data.get())) return filterNumber<Int8>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnInt16 *>(data.get())) return filterNumber<Int16>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnInt32 *>(data.get())) return filterNumber<Int32>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnInt64 *>(data.get())) return filterNumber<Int64>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnFloat32 *>(data.get())) return filterNumber<Float32>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnFloat64 *>(data.get())) return filterNumber<Float64>(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnString *>(data.get())) return filterString(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnTuple *>(data.get())) return filterTuple(filt, result_size_hint, inverse);
if (typeid_cast<const ColumnNullable *>(data.get())) return filterNullable(filt, result_size_hint, inverse);
return filterGeneric(filt, result_size_hint, inverse);
}
void ColumnArray::expand(const IColumn::Filter & mask, bool reverse)
void ColumnArray::expand(const IColumn::Filter & mask, bool inverse)
{
expandOffsetsByMask(getOffsets(), mask, reverse);
expandOffsetsByMask(getOffsets(), mask, inverse);
}
template <typename T>
ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
if (getOffsets().empty())
return ColumnArray::create(data);
@ -568,11 +568,11 @@ ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hin
auto & res_elems = assert_cast<ColumnVector<T> &>(res->getData()).getData();
Offsets & res_offsets = res->getOffsets();
filterArraysImpl<T>(assert_cast<const ColumnVector<T> &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint, reverse);
filterArraysImpl<T>(assert_cast<const ColumnVector<T> &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint, inverse);
return res;
}
ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
size_t col_size = getOffsets().size();
if (col_size != filt.size())
@ -610,7 +610,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
/// Number of rows in the array.
size_t array_size = src_offsets[i] - prev_src_offset;
if (reverse ^ filt[i])
if (inverse ^ filt[i])
{
/// If the array is not empty - copy content.
if (array_size)
@ -640,7 +640,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hin
return res;
}
ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
size_t size = getOffsets().size();
if (size != filt.size())
@ -652,7 +652,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
Filter nested_filt(getOffsets().back());
for (size_t i = 0; i < size; ++i)
{
if (reverse ^ filt[i])
if (inverse ^ filt[i])
memset(&nested_filt[offsetAt(i)], 1, sizeAt(i));
else
memset(&nested_filt[offsetAt(i)], 0, sizeAt(i));
@ -675,7 +675,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
size_t current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
if (reverse ^ filt[i])
if (inverse ^ filt[i])
{
current_offset += sizeAt(i);
res_offsets.push_back(current_offset);
@ -685,7 +685,7 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hi
return res;
}
ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
if (getOffsets().empty())
return ColumnArray::create(data);
@ -693,13 +693,13 @@ ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_h
const ColumnNullable & nullable_elems = assert_cast<const ColumnNullable &>(*data);
auto array_of_nested = ColumnArray::create(nullable_elems.getNestedColumnPtr(), offsets);
auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint, reverse);
auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint, inverse);
const auto & filtered_array_of_nested = assert_cast<const ColumnArray &>(*filtered_array_of_nested_owner);
const auto & filtered_offsets = filtered_array_of_nested.getOffsetsPtr();
auto res_null_map = ColumnUInt8::create();
filterArraysImplOnlyData(nullable_elems.getNullMapData(), getOffsets(), res_null_map->getData(), filt, result_size_hint, reverse);
filterArraysImplOnlyData(nullable_elems.getNullMapData(), getOffsets(), res_null_map->getData(), filt, result_size_hint, inverse);
return ColumnArray::create(
ColumnNullable::create(
@ -708,7 +708,7 @@ ColumnPtr ColumnArray::filterNullable(const Filter & filt, ssize_t result_size_h
filtered_offsets);
}
ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
if (getOffsets().empty())
return ColumnArray::create(data);
@ -725,7 +725,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
Columns temporary_arrays(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
temporary_arrays[i] = ColumnArray(tuple.getColumns()[i]->assumeMutable(), getOffsetsPtr()->assumeMutable())
.filter(filt, result_size_hint, reverse);
.filter(filt, result_size_hint, inverse);
Columns tuple_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)

View File

@ -70,8 +70,8 @@ public:
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const Filter & mask, bool reverse) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
@ -174,12 +174,12 @@ private:
/// Specializations for the filter function.
template <typename T>
ColumnPtr filterNumber(const Filter & filt, ssize_t result_size_hint, bool reverse) const;
ColumnPtr filterNumber(const Filter & filt, ssize_t result_size_hint, bool inverse) const;
ColumnPtr filterString(const Filter & filt, ssize_t result_size_hint, bool reverse) const;
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint, bool reverse) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint, bool reverse) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint, bool reverse) const;
ColumnPtr filterString(const Filter & filt, ssize_t result_size_hint, bool inverse) const;
ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint, bool inverse) const;
ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint, bool inverse) const;
ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint, bool inverse) const;
int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const;

View File

@ -53,25 +53,25 @@ ColumnPtr ColumnConst::removeLowCardinality() const
return ColumnConst::create(data->convertToFullColumnIfLowCardinality(), s);
}
ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/, bool reverse) const
ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/, bool inverse) const
{
if (s != filt.size())
throw Exception("Size of filter (" + toString(filt.size()) + ") doesn't match size of column (" + toString(s) + ")",
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
size_t new_size = countBytesInFilter(filt);
if (reverse)
if (inverse)
new_size = filt.size() - new_size;
return ColumnConst::create(data, new_size);
}
void ColumnConst::expand(const Filter & mask, bool reverse)
void ColumnConst::expand(const Filter & mask, bool inverse)
{
if (mask.size() < s)
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
size_t bytes_count = countBytesInFilter(mask);
if (reverse)
if (inverse)
bytes_count = mask.size() - bytes_count;
if (bytes_count < s)

View File

@ -180,8 +180,8 @@ public:
data->updateHashFast(hash);
}
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const Filter & mask, bool reverse) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -293,7 +293,7 @@ void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
}
template <typename T>
ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse) const
{
size_t size = data.size();
if (size != filt.size())
@ -311,7 +311,7 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
while (filt_pos < filt_end)
{
if (reverse ^ *filt_pos)
if (inverse ^ *filt_pos)
res_data.push_back(*data_pos);
++filt_pos;
@ -322,9 +322,9 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
}
template <typename T>
void ColumnDecimal<T>::expand(const IColumn::Filter & mask, bool reverse)
void ColumnDecimal<T>::expand(const IColumn::Filter & mask, bool inverse)
{
expandDataByMask<T>(data, mask, reverse, T());
expandDataByMask<T>(data, mask, inverse);
}
template <typename T>

View File

@ -150,8 +150,8 @@ public:
UInt64 get64(size_t n) const override;
bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const IColumn::Filter & mask, bool reverse) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const IColumn::Filter & mask, bool inverse) override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -266,7 +266,7 @@ void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_
memcpy(chars.data() + old_size, &src_concrete.chars[start * n], length * n);
}
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse) const
{
size_t col_size = size();
if (col_size != filt.size())
@ -296,7 +296,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
while (filt_pos < filt_end_sse)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = reverse ? mask : ~mask;
mask = inverse ? mask : ~mask;
if (0 == mask)
{
@ -313,7 +313,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
size_t res_chars_size = res->chars.size();
for (size_t i = 0; i < SIMD_BYTES; ++i)
{
if (reverse ^ filt_pos[i])
if (inverse ^ filt_pos[i])
{
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
@ -330,7 +330,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
size_t res_chars_size = res->chars.size();
while (filt_pos < filt_end)
{
if (reverse ^ *filt_pos)
if (inverse ^ *filt_pos)
{
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n);
@ -344,7 +344,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
return res;
}
void ColumnFixedString::expand(const IColumn::Filter & mask, bool reverse)
void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverse)
{
if (mask.size() < size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
@ -354,7 +354,7 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool reverse)
chars.resize_fill(mask.size() * n, 0);
while (index >= 0)
{
if (mask[index] ^ reverse)
if (mask[index] ^ inverse)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);

View File

@ -145,9 +145,9 @@ public:
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const IColumn::Filter & mask, bool reverse) override;
void expand(const IColumn::Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -2,9 +2,15 @@
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnsCommon.h>
#include <Common/PODArray.h>
#include <Common/ProfileEvents.h>
#include <IO/WriteHelpers.h>
#include <Functions/IFunction.h>
namespace ProfileEvents
{
extern const Event FunctionExecute;
extern const Event CompiledFunctionExecute;
}
namespace DB
{
@ -15,8 +21,8 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
ColumnFunction::ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_)
: size_(size), function(function_), is_short_circuit_argument(is_short_circuit_argument_)
ColumnFunction::ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_, bool is_function_compiled_)
: size_(size), function(function_), is_short_circuit_argument(is_short_circuit_argument_), is_function_compiled(is_function_compiled_)
{
appendArguments(columns_to_capture);
}
@ -53,7 +59,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
return ColumnFunction::create(length, function, capture, is_short_circuit_argument);
}
ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
if (size_ != filt.size())
throw Exception("Size of filter (" + toString(filt.size()) + ") doesn't match size of column ("
@ -61,13 +67,13 @@ ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint,
ColumnsWithTypeAndName capture = captured_columns;
for (auto & column : capture)
column.column = column.column->filter(filt, result_size_hint, reverse);
column.column = column.column->filter(filt, result_size_hint, inverse);
size_t filtered_size = 0;
if (capture.empty())
{
filtered_size = countBytesInFilter(filt);
if (reverse)
if (inverse)
filtered_size = filt.size() - filtered_size;
}
else
@ -76,12 +82,12 @@ ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint,
return ColumnFunction::create(filtered_size, function, capture, is_short_circuit_argument);
}
void ColumnFunction::expand(const Filter & mask, bool reverse)
void ColumnFunction::expand(const Filter & mask, bool inverse)
{
for (auto & column : captured_columns)
{
column.column = column.column->cloneResized(column.column->size());
column.column->assumeMutable()->expand(mask, reverse);
column.column->assumeMutable()->expand(mask, inverse);
}
size_ = mask.size();
@ -217,6 +223,7 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
if (function->isShortCircuit())
function->executeShortCircuitArguments(columns);
/// Arguments of lazy executed function can also be lazy executed.
const ColumnFunction * arg;
for (auto & col : columns)
{
@ -227,6 +234,10 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
ColumnWithTypeAndName res{nullptr, function->getResultType(), ""};
ProfileEvents::increment(ProfileEvents::FunctionExecute);
if (is_function_compiled)
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
res.column = function->execute(columns, res.type, size_);
return res;
}

View File

@ -25,7 +25,7 @@ class ColumnFunction final : public COWHelper<IColumn, ColumnFunction>
private:
friend class COWHelper<IColumn, ColumnFunction>;
ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_ = false);
ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_ = false, bool is_function_compiled_ = false);
public:
const char * getFamilyName() const override { return "Function"; }
@ -37,8 +37,8 @@ public:
ColumnPtr cut(size_t start, size_t length) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const Filter & mask, bool reverse) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -160,8 +160,16 @@ private:
size_t size_;
FunctionBasePtr function;
ColumnsWithTypeAndName captured_columns;
/// Determine if it's used as a lazy executed argument for short-circuit function.
/// It's needed to distinguish between lazy executed argument and
/// argument with ColumnFunction column (some functions can return it)
/// See ExpressionActions.cpp for details.
bool is_short_circuit_argument;
/// Determine if passed function is compiled. Used for profiling.
bool is_function_compiled;
void appendArgument(const ColumnWithTypeAndName & column);
};

View File

@ -105,14 +105,14 @@ public:
void updateHashFast(SipHash &) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override
{
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint, reverse));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint, inverse));
}
void expand(const Filter & mask, bool reverse) override
void expand(const Filter & mask, bool inverse) override
{
idx.getPositionsPtr()->expand(mask, reverse);
idx.getPositionsPtr()->expand(mask, inverse);
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override

View File

@ -143,15 +143,15 @@ void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length
start, length);
}
ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
auto filtered = nested->filter(filt, result_size_hint, reverse);
auto filtered = nested->filter(filt, result_size_hint, inverse);
return ColumnMap::create(filtered);
}
void ColumnMap::expand(const IColumn::Filter & mask, bool reverse)
void ColumnMap::expand(const IColumn::Filter & mask, bool inverse)
{
nested->expand(mask, reverse);
nested->expand(mask, inverse);
}
ColumnPtr ColumnMap::permute(const Permutation & perm, size_t limit) const

View File

@ -63,8 +63,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const Filter & mask, bool reverse) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;

View File

@ -214,17 +214,17 @@ void ColumnNullable::popBack(size_t n)
getNullMapColumn().popBack(n);
}
ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint, reverse);
ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint, reverse);
ColumnPtr filtered_data = getNestedColumn().filter(filt, result_size_hint, inverse);
ColumnPtr filtered_null_map = getNullMapColumn().filter(filt, result_size_hint, inverse);
return ColumnNullable::create(filtered_data, filtered_null_map);
}
void ColumnNullable::expand(const IColumn::Filter & mask, bool reverse)
void ColumnNullable::expand(const IColumn::Filter & mask, bool inverse)
{
nested_column->expand(mask, reverse);
null_map->expand(mask, reverse);
nested_column->expand(mask, inverse);
null_map->expand(mask, inverse);
}
ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const

View File

@ -87,8 +87,8 @@ public:
}
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const Filter & mask, bool reverse) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;

View File

@ -144,7 +144,7 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len
}
ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
if (offsets.empty())
return ColumnString::create();
@ -154,13 +154,13 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint, bo
Chars & res_chars = res->chars;
Offsets & res_offsets = res->offsets;
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint, reverse);
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint, inverse);
return res;
}
void ColumnString::expand(const IColumn::Filter & mask, bool reverse)
void ColumnString::expand(const IColumn::Filter & mask, bool inverse)
{
expandOffsetsByMask(offsets, mask, reverse);
expandOffsetsByMask(offsets, mask, inverse);
}

View File

@ -210,9 +210,9 @@ public:
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool reverse) override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;

View File

@ -221,21 +221,21 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng
start, length);
}
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const
{
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
new_columns[i] = columns[i]->filter(filt, result_size_hint, reverse);
new_columns[i] = columns[i]->filter(filt, result_size_hint, inverse);
return ColumnTuple::create(new_columns);
}
void ColumnTuple::expand(const Filter & mask, bool reverse)
void ColumnTuple::expand(const Filter & mask, bool inverse)
{
for (auto & column : columns)
column->expand(mask, reverse);
column->expand(mask, inverse);
}
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const

View File

@ -66,8 +66,8 @@ public:
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool reverse) const override;
void expand(const Filter & mask, bool reverse) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const Filter & mask, bool inverse) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;

View File

@ -345,7 +345,7 @@ void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
}
template <typename T>
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse) const
{
size_t size = data.size();
if (size != filt.size())
@ -375,7 +375,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end_sse)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = reverse ? mask : ~mask;
mask = inverse ? mask : ~mask;
if (0 == mask)
{
@ -388,7 +388,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (reverse ^ filt_pos[i])
if (inverse ^ filt_pos[i])
res_data.push_back(data_pos[i]);
}
@ -399,7 +399,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
while (filt_pos < filt_end)
{
if (reverse ^ *filt_pos)
if (inverse ^ *filt_pos)
res_data.push_back(*data_pos);
++filt_pos;
@ -410,9 +410,9 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
}
template <typename T>
void ColumnVector<T>::expand(const IColumn::Filter & mask, bool reverse)
void ColumnVector<T>::expand(const IColumn::Filter & mask, bool inverse)
{
expandDataByMask<T>(data, mask, reverse, T());
expandDataByMask<T>(data, mask, inverse);
}
template <typename T>

View File

@ -283,9 +283,9 @@ public:
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse) const override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse) const override;
void expand(const IColumn::Filter & mask, bool reverse) override;
void expand(const IColumn::Filter & mask, bool inverse) override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;

View File

@ -192,7 +192,7 @@ namespace
void filterArraysImplGeneric(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets * res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse)
const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse)
{
const size_t size = src_offsets.size();
if (size != filt.size())
@ -239,7 +239,7 @@ namespace
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
zero_vec));
mask = reverse ? mask : ~mask;
mask = inverse ? mask : ~mask;
if (mask == 0)
{
@ -263,7 +263,7 @@ namespace
else
{
for (size_t i = 0; i < SIMD_BYTES; ++i)
if (reverse ^ filt_pos[i])
if (inverse ^ filt_pos[i])
copy_array(offsets_pos + i);
}
@ -274,7 +274,7 @@ namespace
while (filt_pos < filt_end)
{
if (reverse ^ *filt_pos)
if (inverse ^ *filt_pos)
copy_array(offsets_pos);
++filt_pos;
@ -288,18 +288,18 @@ template <typename T>
void filterArraysImpl(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets & res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse)
const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse)
{
return filterArraysImplGeneric<T, ResultOffsetsBuilder>(src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint, reverse);
return filterArraysImplGeneric<T, ResultOffsetsBuilder>(src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint, inverse);
}
template <typename T>
void filterArraysImplOnlyData(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems,
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse)
const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse)
{
return filterArraysImplGeneric<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint, reverse);
return filterArraysImplGeneric<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint, inverse);
}

View File

@ -32,14 +32,14 @@ template <typename T>
void filterArraysImpl(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems, IColumn::Offsets & res_offsets,
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse);
const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse);
/// Same as above, but not fills res_offsets.
template <typename T>
void filterArraysImplOnlyData(
const PaddedPODArray<T> & src_elems, const IColumn::Offsets & src_offsets,
PaddedPODArray<T> & res_elems,
const IColumn::Filter & filt, ssize_t result_size_hint, bool reverse = false);
const IColumn::Filter & filt, ssize_t result_size_hint, bool inverse = false);
namespace detail
{

View File

@ -230,16 +230,20 @@ public:
/** Removes elements that don't match the filter.
* Is used in WHERE and HAVING operations.
* If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column;
* if 0, then don't makes reserve(),
* otherwise (i.e. < 0), makes reserve() using size of source column.
* if 0, then don't makes reserve(),
* otherwise (i.e. < 0), makes reserve() using size of source column.
* If inverse is true, inverted filter will be used.
*/
using Filter = PaddedPODArray<UInt8>;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint, bool reverse = false) const = 0;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint, bool inverse = false) const = 0;
virtual void expand(const Filter &, bool)
{
throw Exception("expand function is not implemented", ErrorCodes::NOT_IMPLEMENTED);
}
/** Expand column by mask inplace. After expanding column will
* satisfy the following: if we filter it by given mask, we will
* get initial column. Values with indexes i: mask[i] = 0
* shouldn't be used after expanding.
* If inverse is true, inverted mask will be used.
*/
virtual void expand(const Filter & /*mask*/, bool /*inverse*/) = 0;
/// Permutes elements using specified permutation. Is used in sorting.
/// limit - if it isn't 0, puts only first limit elements in the result.

View File

@ -98,18 +98,18 @@ public:
s += length;
}
ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/, bool reverse = false) const override
ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/, bool inverse = false) const override
{
size_t bytes = countBytesInFilter(filt);
if (reverse)
if (inverse)
bytes = filt.size() - bytes;
return cloneDummy(bytes);
}
void expand(const IColumn::Filter & mask, bool reverse) override
void expand(const IColumn::Filter & mask, bool inverse) override
{
size_t bytes = countBytesInFilter(mask);
if (reverse)
if (inverse)
bytes = mask.size() - bytes;
s = bytes;
}

View File

@ -14,7 +14,7 @@ namespace ErrorCodes
}
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool reverse, T default_value)
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverse)
{
if (mask.size() < data.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
@ -24,7 +24,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
data.resize(mask.size());
while (index >= 0)
{
if (mask[index] ^ reverse)
if (mask[index] ^ inverse)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
@ -32,20 +32,17 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
data[index] = data[from];
--from;
}
else
data[index] = default_value;
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
/// Explicit instantiations - not to place the implementation of the function above in the header file.
#define INSTANTIATE(TYPE) \
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool, TYPE);
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)
@ -71,7 +68,7 @@ INSTANTIATE(UUID)
#undef INSTANTIATE
void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<UInt8> & mask, bool reverse)
void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<UInt8> & mask, bool inverse)
{
if (mask.size() < offsets.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
@ -82,7 +79,7 @@ void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<
UInt64 prev_offset = offsets[from];
while (index >= 0)
{
if (mask[index] ^ reverse)
if (mask[index] ^ inverse)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
@ -100,33 +97,35 @@ void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
void expandColumnByMask(const ColumnPtr & column, const PaddedPODArray<UInt8>& mask, bool reverse)
void expandColumnByMask(const ColumnPtr & column, const PaddedPODArray<UInt8>& mask, bool inverse)
{
column->assumeMutable()->expand(mask, reverse);
column->assumeMutable()->expand(mask, inverse);
}
void getMaskFromColumn(
const ColumnPtr & column,
PaddedPODArray<UInt8> & res,
bool reverse,
const PaddedPODArray<UInt8> * expanding_mask,
UInt8 default_value,
bool expanding_mask_reverse,
const PaddedPODArray<UInt8> * null_bytemap,
UInt8 null_value)
bool inverse,
const PaddedPODArray<UInt8> * mask_used_in_expanding,
UInt8 default_value_in_expanding,
bool inverse_mask_used_in_expanding,
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap)
{
if (const auto * col = checkAndGetColumn<ColumnNothing>(*column))
{
res.resize_fill(col->size(), reverse ? !null_value : null_value);
res.resize_fill(col->size(), inverse ? !null_value : null_value);
return;
}
if (const auto * col = checkAndGetColumn<ColumnNullable>(*column))
{
const PaddedPODArray<UInt8> & null_map = checkAndGetColumn<ColumnUInt8>(*col->getNullMapColumnPtr())->getData();
return getMaskFromColumn(col->getNestedColumnPtr(), res, reverse, expanding_mask, default_value, expanding_mask_reverse, &null_map, null_value);
return getMaskFromColumn(col->getNestedColumnPtr(), res, inverse, mask_used_in_expanding, default_value_in_expanding, inverse_mask_used_in_expanding, null_value, &null_map);
}
/// Some columns doesn't implement getBool() method and we cannot
/// convert them to mask, throw an exception in this case.
try
{
if (res.size() != column->size())
@ -134,12 +133,12 @@ void getMaskFromColumn(
for (size_t i = 0; i != column->size(); ++i)
{
if (expanding_mask && (!(*expanding_mask)[i] ^ expanding_mask_reverse))
res[i] = reverse ? !default_value : default_value;
if (mask_used_in_expanding && (!(*mask_used_in_expanding)[i] ^ inverse_mask_used_in_expanding))
res[i] = inverse ? !default_value_in_expanding : default_value_in_expanding;
else if (null_bytemap && (*null_bytemap)[i])
res[i] = reverse ? !null_value : null_value;
res[i] = inverse ? !null_value : null_value;
else
res[i] = reverse ? !column->getBool(i): column->getBool(i);
res[i] = inverse ? !column->getBool(i): column->getBool(i);
}
}
catch (...)
@ -148,35 +147,24 @@ void getMaskFromColumn(
}
}
template <typename Op>
void binaryMasksOperationImpl(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2, Op operation)
{
if (mask1.size() != mask2.size())
throw Exception("Masks have different sizes", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i != mask1.size(); ++i)
mask1[i] = operation(mask1[i], mask2[i]);
}
void conjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2)
{
binaryMasksOperationImpl(mask1, mask2, [](const auto & lhs, const auto & rhs){ return lhs & rhs; });
}
void disjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2)
{
binaryMasksOperationImpl(mask1, mask2, [](const auto & lhs, const auto & rhs){ return lhs | rhs; });
if (mask1.size() != mask2.size())
throw Exception("Cannot make a disjunction of masks, they have different sizes", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i != mask1.size(); ++i)
mask1[i] = mask1[i] | mask2[i];
}
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, bool reverse)
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, bool inverse)
{
const auto * column_function = checkAndGetColumn<ColumnFunction>(*column.column);
if (!column_function || !column_function->isShortCircuitArgument())
return;
auto filtered = column_function->filter(mask, -1, reverse);
auto filtered = column_function->filter(mask, -1, inverse);
auto result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
expandColumnByMask(result.column, mask, reverse);
expandColumnByMask(result.column, mask, inverse);
column = std::move(result);
}

View File

@ -7,29 +7,47 @@
namespace DB
{
/// Expand data by mask. After expanding data will satisfy the following: if we filter data
/// by given mask, we get initial data. In places where mask[i] = 0 we insert given default_value.
/// If inverse is true, we will work with inverted mask. This function is used in implementations of
/// expand() method in IColumn interface.
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool reverse, T default_value);
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverse);
void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<UInt8> & mask, bool reverse);
/// Expand offsets by mask. Used in expand() method in ColumnArray and ColumnString to expand their offsets.
/// In places where mask[i] = 0 we insert empty array/string.
void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<UInt8> & mask, bool inverse);
/// Convert given column to mask. If inverse is true, we will use inverted values.
/// Usually this function is used after expanding column where we cannot specify default value
/// for places where mask[i] = 0, but sometimes we want it (to reduce unnecessary coping).
/// If mask_used_in_expanding is passed, we will use default_value_in_expanding instead of
/// value from column when mask_used_in_expanding[i] = 0. If inverse_mask_used_in_expanding
/// is true, we will work with inverted mask_used_in_expanding.
/// If column is nullable, null_value will be used when column value is Null.
void getMaskFromColumn(
const ColumnPtr & column,
PaddedPODArray<UInt8> & res,
bool reverse = false,
const PaddedPODArray<UInt8> * expanding_mask = nullptr,
UInt8 default_value = 1,
bool expanding_mask_reverse = false,
const PaddedPODArray<UInt8> * null_bytemap = nullptr,
UInt8 null_value = 1);
void conjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2);
bool inverse = false,
const PaddedPODArray<UInt8> * mask_used_in_expanding = nullptr,
UInt8 default_value_in_expanding = 1,
bool inverse_mask_used_in_expanding = false,
UInt8 null_value = 1,
const PaddedPODArray<UInt8> * null_bytemap = nullptr);
/// Make a disjunction of two masks and write result un the first one (mask1 = mask1 | mask2).
void disjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2);
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, bool reverse = false);
/// If given column is lazy executed argument (ColumnFunction with isShortCircuitArgument() = true),
/// filter it by mask, reduce and then expand by mask. If inverse is true, we will work with inverted mask.
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, bool inverse = false);
/// If given column is lazy executed argument, just reduce it.
void executeColumnIfNeeded(ColumnWithTypeAndName & column);
/// Check if arguments contain lazy executed argument. If contain, return index of the last one,
/// otherwise return -1.
int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments);
}

View File

@ -518,18 +518,26 @@ void FunctionAnyArityLogical<Impl, Name>::executeShortCircuitArguments(ColumnsWi
if (last_short_circuit_argument_index < 0)
return;
bool reverse = Name::name != NameAnd::name;
/// In AND (OR) function we need to execute the next argument
/// only if all previous once are true (false). We will filter the next
/// argument by conjunction (inverted disjunction) of all previous once.
/// To not make conjunction (inverted disjunction) every iteration, we will use
/// default_value_in_expanding = 0 (1) while converting column to mask,
/// so after converting we will get needed conjunction (inverted disjunction).
/// Set null_value according to ternary logic.
UInt8 null_value = Name::name == NameAnd::name ? 1 : 0;
UInt8 value_for_mask_expanding = Name::name == NameAnd::name ? 0 : 1;
bool inverse = Name::name != NameAnd::name;
UInt8 default_value_in_expanding = Name::name == NameAnd::name ? 0 : 1;
executeColumnIfNeeded(arguments[0]);
IColumn::Filter mask;
IColumn::Filter * expanding_mask = nullptr;
IColumn::Filter * mask_used_in_expanding = nullptr;
for (int i = 1; i <= last_short_circuit_argument_index; ++i)
{
getMaskFromColumn(arguments[i - 1].column, mask, reverse, expanding_mask, value_for_mask_expanding, false, nullptr, null_value);
maskedExecute(arguments[i], mask, false);
expanding_mask = &mask;
getMaskFromColumn(arguments[i - 1].column, mask, inverse, mask_used_in_expanding, default_value_in_expanding, false, null_value);
maskedExecute(arguments[i], mask);
mask_used_in_expanding = &mask;
}
}

View File

@ -211,10 +211,24 @@ public:
*/
virtual bool hasInformationAboutMonotonicity() const { return false; }
/** Function is called "short-circuit" if it's arguments can be evaluated lazily
* (examples: and, or, if, multiIf). If function is short circuit, it must
* implement method executeShortCircuitArguments for lazy arguments execution,
* this method will be called before function execution.
*/
virtual bool isShortCircuit() const { return false; }
/** Should we evaluate this function lazily in short-circuit function arguments?
* If function can throw an exception or it's computationally heavy, then
* it's suitable, otherwise it's not (due to the overhead of lazy execution).
* Suitability may depend on function arguments.
*/
virtual bool isSuitableForShortCircuitArgumentsExecution(ColumnsWithTypeAndName & /*arguments*/) const = 0;
/** Method for lazy arguments execution in short-circuit functions.
* Lazy argument is presented as ColumnFunction with isShortCircuitArgument() = true.
* This method is called before function execution.
*/
virtual void executeShortCircuitArguments(ColumnsWithTypeAndName & /*arguments*/) const
{
throw Exception("Function " + getName() + " doesn't support short circuit execution", ErrorCodes::NOT_IMPLEMENTED);
@ -277,8 +291,10 @@ public:
/// Override and return true if function could take different number of arguments.
virtual bool isVariadic() const { return false; }
/// Override and return true if function is short-circuit.
virtual bool isShortCircuit() const { return false; }
/// Override and return true if function is suitable for lazy execution in short-circuit function arguments.
virtual bool isSuitableForShortCircuitArgumentsExecution(ColumnsWithTypeAndName & /*arguments*/) const = 0;
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
@ -391,7 +407,7 @@ public:
*/
virtual bool canBeExecutedOnDefaultArguments() const { return true; }
/// Properties from IFunctionBase (see IFunction.h)
/// Properties from IFunctionBase
virtual bool isSuitableForConstantFolding() const { return true; }
virtual ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr & /*result_type*/) const { return nullptr; }
virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const { return false; }

View File

@ -1,136 +0,0 @@
#pragma once
#include <Functions/IFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
}
/// Old function interface. Check documentation in IFunction.h
class IFunction
{
public:
virtual ~IFunction() = default;
virtual String getName() const = 0;
virtual ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const = 0;
virtual ColumnPtr executeImplDryRun(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
{
return executeImpl(arguments, result_type, input_rows_count);
}
/** Default implementation in presence of Nullable arguments or NULL constants as arguments is the following:
* if some of arguments are NULL constants then return NULL constant,
* if some of arguments are Nullable, then execute function as usual for columns,
* where Nullable columns are substituted with nested columns (they have arbitrary values in rows corresponding to NULL value)
* and wrap result in Nullable column where NULLs are in all rows where any of arguments are NULL.
*/
virtual bool useDefaultImplementationForNulls() const { return true; }
/** If the function have non-zero number of arguments,
* and if all arguments are constant, that we could automatically provide default implementation:
* arguments are converted to ordinary columns with single value, then function is executed as usual,
* and then the result is converted to constant column.
*/
virtual bool useDefaultImplementationForConstants() const { return false; }
/** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column.
* Otherwise, convert all low cardinality columns to ordinary columns.
* Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality.
*/
virtual bool useDefaultImplementationForLowCardinalityColumns() const { return true; }
/// If it isn't, will convert all ColumnLowCardinality arguments to full columns.
virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; }
/** Some arguments could remain constant during this implementation.
*/
virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; }
/** True if function can be called on default arguments (include Nullable's) and won't throw.
* Counterexample: modulo(0, 0)
*/
virtual bool canBeExecutedOnDefaultArguments() const { return true; }
/// Properties from IFunctionBase (see IFunction.h)
virtual bool isSuitableForConstantFolding() const { return true; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & /*arguments*/) const { return nullptr; }
virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const { return false; }
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool isStateful() const { return false; }
virtual bool isShortCircuit() const { return false; }
virtual bool isSuitableForShortCircuitArgumentsExecution(ColumnsWithTypeAndName & /*arguments*/) const = 0;
virtual void executeShortCircuitArguments(ColumnsWithTypeAndName & /*arguments*/) const
{
throw Exception("Function " + getName() + " doesn't support short circuit execution", ErrorCodes::NOT_IMPLEMENTED);
}
virtual bool hasInformationAboutMonotonicity() const { return false; }
using Monotonicity = IFunctionBase::Monotonicity;
virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const
{
throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED);
}
/// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
virtual size_t getNumberOfArguments() const = 0;
virtual DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const
{
throw Exception("getReturnType is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Get the result type by argument type. If the function does not apply to these arguments, throw an exception.
virtual DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
{
DataTypes data_types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
return getReturnTypeImpl(data_types);
}
virtual bool isVariadic() const { return false; }
virtual void getLambdaArgumentTypes(DataTypes & /*arguments*/) const
{
throw Exception("Function " + getName() + " can't have lambda-expressions as arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
virtual ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t /*number_of_arguments*/) const { return {}; }
#if USE_EMBEDDED_COMPILER
bool isCompilable(const DataTypes & arguments) const;
llvm::Value * compile(llvm::IRBuilderBase &, const DataTypes & arguments, Values values) const;
#endif
protected:
#if USE_EMBEDDED_COMPILER
virtual bool isCompilableImpl(const DataTypes &) const { return false; }
virtual llvm::Value * compileImpl(llvm::IRBuilderBase &, const DataTypes &, Values) const
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
#endif
};
using FunctionPtr = std::shared_ptr<IFunction>;
}

View File

@ -928,12 +928,13 @@ public:
return;
executeColumnIfNeeded(arguments[0]);
/// Create mask only if it's needed.
if (last_short_circuit_argument_index > 0)
{
IColumn::Filter mask;
getMaskFromColumn(arguments[0].column, mask);
maskedExecute(arguments[1], mask);
maskedExecute(arguments[2], mask, /*reverse=*/true);
maskedExecute(arguments[2], mask, /*inverse=*/true);
}
}

View File

@ -115,6 +115,14 @@ public:
if (last_short_circuit_argument_index < 0)
return;
/// In multiIf we should execute the next condition only
/// if all previous once are false. So, we will filter
/// the next condition by inverted disjunction of previous once.
/// The next expression should be executed only if it's condition is
/// true and all previous conditions are false. So, we will
/// use default_value_in_expanding = 0 while extracting mask from
/// executed condition and filter expression by this mask.
executeColumnIfNeeded(arguments[0]);
IColumn::Filter current_mask;
IColumn::Filter mask_disjunctions = IColumn::Filter(arguments[0].column->size(), 0);

View File

@ -67,10 +67,14 @@ public:
using NodeRawPtrs = std::vector<Node *>;
using NodeRawConstPtrs = std::vector<const Node *>;
/// States for lazy function execution in short-circuit function arguments.
enum class LazyExecution
{
/// Don't execute lazily.
DISABLED,
/// Execute lazily if it's possible (additional checks are required).
ENABLED,
/// Always execute lazily.
FORCE_ENABLED,
};
@ -97,6 +101,8 @@ public:
/// For COLUMN node and propagated constants.
ColumnPtr column;
/// Determine if this action should be executed lazily. If it should and the action type is FUNCTION, then the function
/// won't be executed and will be stored with it's arguments in ColumnFunction with isShortCircuitArgument() = true.
LazyExecution lazy_execution = LazyExecution::DISABLED;
void toTree(JSONBuilder::JSONMap & map) const;

View File

@ -74,6 +74,7 @@ void ExpressionActions::rewriteShortCircuitArguments(const ActionsDAG::NodeRawCo
{
for (const auto * child : children)
{
/// Skip actions that are needed outside or have already been rewritten.
if (!need_outside.contains(child) || need_outside.at(child) || child->lazy_execution != ActionsDAG::LazyExecution::DISABLED)
continue;
@ -83,6 +84,7 @@ void ExpressionActions::rewriteShortCircuitArguments(const ActionsDAG::NodeRawCo
rewriteShortCircuitArguments(child->children, need_outside, force_rewrite);
const_cast<ActionsDAG::Node *>(child)->lazy_execution = force_rewrite ? ActionsDAG::LazyExecution::FORCE_ENABLED : ActionsDAG::LazyExecution::ENABLED;
break;
/// Propagate lazy execution through aliases.
case ActionsDAG::ActionType::ALIAS:
rewriteShortCircuitArguments(child->children, need_outside, force_rewrite);
break;
@ -102,6 +104,11 @@ void ExpressionActions::rewriteArgumentsForShortCircuitFunctions(
{
if (node.type == ActionsDAG::ActionType::FUNCTION && node.function_base->isShortCircuit())
{
/// We should enable lazy execution for all actions that are used only in arguments of
/// short-circuit function. To determine if an action is used somewhere else we use
/// BFS, started from action with short-circuit function. If an action has parent that we didn't
/// visited earlier, it means that this action is used somewhere else. After BFS we will
/// have map need_outside: node -> is this node used somewhere else.
std::unordered_map<const ActionsDAG::Node *, bool> need_outside;
std::deque<const ActionsDAG::Node *> queue;
for (const auto * child : node.children)
@ -112,13 +119,15 @@ void ExpressionActions::rewriteArgumentsForShortCircuitFunctions(
{
const ActionsDAG::Node * cur = queue.front();
queue.pop_front();
/// If we've already visited this action, just continue.
if (need_outside.contains(cur))
continue;
bool is_need_outside = false;
/// If action is used in result, we can't enable lazy execution.
if (data[reverse_index.at(cur)].used_in_result)
need_outside[cur] = true;
is_need_outside = true;
else
{
bool is_need_outside = false;
for (const auto * parent : data[reverse_index.at(cur)].parents)
{
if (!need_outside.contains(parent) || need_outside[parent])
@ -127,13 +136,24 @@ void ExpressionActions::rewriteArgumentsForShortCircuitFunctions(
break;
}
}
need_outside[cur] = is_need_outside;
}
need_outside[cur] = is_need_outside;
for (const auto * child : cur->children)
queue.push_back(child);
/// If this action is needed outside, all it's descendants are also needed outside
/// and we don't have to add them in queue (if action is not in need_outside we
/// treat it as it's needed outside).
if (!is_need_outside)
{
for (const auto * child : cur->children)
queue.push_back(child);
}
}
/// If short-circuit function has only one argument, then we don't have to
/// evaluate this argument at all (example: toTypeName). In this case we
/// use LazyExecution::FORCE_ENABLED state.
bool force_rewrite = (node.children.size() == 1);
/// Recursively enable lazy execution for actions that
/// aren't needed outside short-circuit function arguments.
rewriteShortCircuitArguments(node.children, need_outside, force_rewrite);
}
}
@ -415,6 +435,9 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon
arguments[i] = columns[action.arguments[i].pos];
}
/// Use lazy execution when:
/// - It's force enabled.
/// - It's is enabled and function is suitable for lazy execution or it has lazy executed arguments.
if (action.node->lazy_execution == ActionsDAG::LazyExecution::FORCE_ENABLED
|| (action.node->lazy_execution == ActionsDAG::LazyExecution::ENABLED
&& (action.node->function_base->isSuitableForShortCircuitArgumentsExecution(arguments) || checkShirtCircuitArguments(arguments) >= 0)))

View File

@ -133,9 +133,12 @@ private:
void checkLimits(const ColumnsWithTypeAndName & columns) const;
void linearizeActions();
/// Enable lazy execution for short-circuit function arguments.
void rewriteShortCircuitArguments(
const ActionsDAG::NodeRawConstPtrs & children, const std::unordered_map<const ActionsDAG::Node *, bool> & need_outside, bool force_rewrite);
/// Find short-circuit functions in actions and enable lazy execution for actions that are used in their arguments.
void rewriteArgumentsForShortCircuitFunctions(
const std::list<ActionsDAG::Node> & nodes,
const std::vector<Data> & data,