ClickHouse/src/Columns/MaskOperations.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

340 lines
9.9 KiB
C++
Raw Normal View History

2021-04-22 15:14:58 +00:00
#include <Columns/MaskOperations.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnsCommon.h>
2021-06-22 16:21:23 +00:00
#include <Columns/ColumnConst.h>
#include <Columns/ColumnLowCardinality.h>
#include <algorithm>
2021-04-22 15:14:58 +00:00
namespace DB
{
namespace ErrorCodes
{
2021-06-07 11:52:54 +00:00
extern const int LOGICAL_ERROR;
2021-06-22 16:21:23 +00:00
extern const int ILLEGAL_COLUMN;
2021-04-22 15:14:58 +00:00
}
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted)
2021-04-22 15:14:58 +00:00
{
if (mask.size() < data.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Mask size should be no less than data size.");
ssize_t from = data.size() - 1;
ssize_t index = mask.size() - 1;
data.resize(mask.size());
while (index >= 0)
2021-04-22 15:14:58 +00:00
{
2021-08-18 16:13:07 +00:00
if (!!mask[index] ^ inverted)
2021-04-22 15:14:58 +00:00
{
if (from < 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many bytes in mask");
2021-04-22 15:14:58 +00:00
2021-06-10 13:15:18 +00:00
/// Copy only if it makes sense.
if (index != from)
data[index] = data[from];
--from;
2021-04-22 15:14:58 +00:00
}
2021-06-08 13:55:07 +00:00
else
data[index] = T();
--index;
}
if (from != -1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough bytes in mask");
}
/// Explicit instantiations - not to place the implementation of the function above in the header file.
#define INSTANTIATE(TYPE) \
2021-05-18 13:05:55 +00:00
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)
INSTANTIATE(UInt32)
INSTANTIATE(UInt64)
INSTANTIATE(UInt128)
INSTANTIATE(UInt256)
INSTANTIATE(Int8)
INSTANTIATE(Int16)
INSTANTIATE(Int32)
INSTANTIATE(Int64)
INSTANTIATE(Int128)
INSTANTIATE(Int256)
INSTANTIATE(Float32)
INSTANTIATE(Float64)
INSTANTIATE(Decimal32)
INSTANTIATE(Decimal64)
INSTANTIATE(Decimal128)
INSTANTIATE(Decimal256)
INSTANTIATE(DateTime64)
INSTANTIATE(char *)
2021-05-14 14:07:24 +00:00
INSTANTIATE(UUID)
INSTANTIATE(IPv4)
INSTANTIATE(IPv6)
#undef INSTANTIATE
2021-06-22 16:21:23 +00:00
template <bool inverted, bool column_is_short, typename Container>
size_t extractMaskNumericImpl(
2021-08-10 11:31:15 +00:00
PaddedPODArray<UInt8> & mask,
2021-06-22 16:21:23 +00:00
const Container & data,
2021-05-18 13:05:55 +00:00
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls)
2021-04-22 15:14:58 +00:00
{
2022-03-22 19:15:05 +00:00
if constexpr (!column_is_short)
{
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
}
2022-03-23 22:15:07 +00:00
2021-06-22 16:21:23 +00:00
size_t ones_count = 0;
size_t data_index = 0;
size_t mask_size = mask.size();
size_t data_size = data.size();
size_t i = 0;
for (; i != mask_size && data_index != data_size; ++i)
2021-04-22 15:14:58 +00:00
{
2021-08-10 11:31:15 +00:00
// Change mask only where value is 1.
if (!mask[i])
continue;
UInt8 value;
size_t index;
if constexpr (column_is_short)
2021-06-22 16:21:23 +00:00
{
2021-08-10 11:31:15 +00:00
index = data_index;
++data_index;
2021-06-22 16:21:23 +00:00
}
2021-08-10 11:31:15 +00:00
else
index = i;
if (null_bytemap && (*null_bytemap)[index])
{
value = null_value;
if (nulls)
(*nulls)[i] = 1;
}
else
2022-09-10 02:07:51 +00:00
value = static_cast<bool>(data[index]);
2021-08-10 11:31:15 +00:00
if constexpr (inverted)
value = !value;
2021-06-22 16:21:23 +00:00
if (value)
++ones_count;
2021-08-10 11:31:15 +00:00
mask[i] = value;
2021-04-22 15:14:58 +00:00
}
2022-03-22 19:15:05 +00:00
if constexpr (column_is_short)
{
2022-03-23 10:39:38 +00:00
if (data_index != data_size)
2022-03-22 19:15:05 +00:00
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
}
2022-03-23 22:15:07 +00:00
2021-06-22 16:21:23 +00:00
return ones_count;
}
template <bool inverted, typename NumericType>
bool extractMaskNumeric(
2021-08-10 11:31:15 +00:00
PaddedPODArray<UInt8> & mask,
2021-06-22 16:21:23 +00:00
const ColumnPtr & column,
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls,
MaskInfo & mask_info)
{
const auto * numeric_column = checkAndGetColumn<ColumnVector<NumericType>>(column.get());
if (!numeric_column)
return false;
2021-04-22 15:14:58 +00:00
2021-06-22 16:21:23 +00:00
const auto & data = numeric_column->getData();
size_t ones_count;
if (column->size() < mask.size())
2021-08-10 11:31:15 +00:00
ones_count = extractMaskNumericImpl<inverted, true>(mask, data, null_value, null_bytemap, nulls);
2021-06-22 16:21:23 +00:00
else
2021-08-10 11:31:15 +00:00
ones_count = extractMaskNumericImpl<inverted, false>(mask, data, null_value, null_bytemap, nulls);
2021-06-10 17:47:34 +00:00
2021-06-22 16:21:23 +00:00
mask_info.has_ones = ones_count > 0;
mask_info.has_zeros = ones_count != mask.size();
return true;
}
template <bool inverted>
MaskInfo extractMaskFromConstOrNull(
2021-08-10 11:31:15 +00:00
PaddedPODArray<UInt8> & mask,
2021-06-22 16:21:23 +00:00
const ColumnPtr & column,
UInt8 null_value,
PaddedPODArray<UInt8> * nulls = nullptr)
{
UInt8 value;
if (column->onlyNull())
2021-06-10 17:47:34 +00:00
{
2021-06-22 16:21:23 +00:00
value = null_value;
if (nulls)
std::fill(nulls->begin(), nulls->end(), 1);
2021-06-10 17:47:34 +00:00
}
2021-06-22 16:21:23 +00:00
else
value = column->getBool(0);
2021-06-22 16:21:23 +00:00
if constexpr (inverted)
value = !value;
2021-06-22 16:21:23 +00:00
size_t ones_count = 0;
if (value)
2021-08-10 11:31:15 +00:00
ones_count = countBytesInFilter(mask);
2021-06-22 16:21:23 +00:00
else
2021-08-10 11:31:15 +00:00
std::fill(mask.begin(), mask.end(), 0);
2021-06-22 16:21:23 +00:00
return {.has_ones = ones_count > 0, .has_zeros = ones_count != mask.size()};
}
2021-06-22 16:21:23 +00:00
template <bool inverted>
MaskInfo extractMaskImpl(
2021-08-10 11:31:15 +00:00
PaddedPODArray<UInt8> & mask,
const ColumnPtr & col,
2021-06-22 16:21:23 +00:00
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls = nullptr)
{
auto column = col->convertToFullColumnIfLowCardinality();
2021-06-22 16:21:23 +00:00
/// Special implementation for Null and Const columns.
if (column->onlyNull() || checkAndGetColumn<ColumnConst>(*column))
2021-08-10 11:31:15 +00:00
return extractMaskFromConstOrNull<inverted>(mask, column, null_value, nulls);
if (const auto * nullable_column = checkAndGetColumn<ColumnNullable>(*column))
{
const PaddedPODArray<UInt8> & null_map = nullable_column->getNullMapData();
return extractMaskImpl<inverted>(mask, nullable_column->getNestedColumnPtr(), null_value, &null_map, nulls);
}
2021-06-22 16:21:23 +00:00
MaskInfo mask_info;
2021-08-10 11:31:15 +00:00
if (!(extractMaskNumeric<inverted, UInt8>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, UInt16>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, UInt32>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, UInt64>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int8>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int16>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int32>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int64>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Float32>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Float64>(mask, column, null_value, null_bytemap, nulls, mask_info)))
2021-06-22 16:21:23 +00:00
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot convert column {} to mask.", column->getName());
return mask_info;
2021-04-22 15:14:58 +00:00
}
2021-06-22 16:21:23 +00:00
MaskInfo extractMask(
2021-08-10 11:31:15 +00:00
PaddedPODArray<UInt8> & mask,
2021-06-22 16:21:23 +00:00
const ColumnPtr & column,
UInt8 null_value)
2021-04-22 15:14:58 +00:00
{
2021-08-10 11:31:15 +00:00
return extractMaskImpl<false>(mask, column, null_value, nullptr);
2021-06-22 16:21:23 +00:00
}
2021-04-22 15:14:58 +00:00
2021-08-10 11:31:15 +00:00
MaskInfo extractInvertedMask(
2021-06-22 16:21:23 +00:00
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value)
{
2021-08-10 11:31:15 +00:00
return extractMaskImpl<true>(mask, column, null_value, nullptr);
2021-06-22 16:21:23 +00:00
}
2021-08-10 11:31:15 +00:00
MaskInfo extractMask(
2021-06-22 16:21:23 +00:00
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
PaddedPODArray<UInt8> * nulls,
UInt8 null_value)
{
2021-08-10 11:31:15 +00:00
return extractMaskImpl<false>(mask, column, null_value, nullptr, nulls);
}
2021-08-10 11:31:15 +00:00
MaskInfo extractInvertedMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
PaddedPODArray<UInt8> * nulls,
UInt8 null_value)
{
return extractMaskImpl<true>(mask, column, null_value, nullptr, nulls);
}
2021-06-22 16:21:23 +00:00
2021-08-10 11:31:15 +00:00
void inverseMask(PaddedPODArray<UInt8> & mask, MaskInfo & mask_info)
{
2021-09-11 23:49:31 +00:00
for (auto & byte : mask)
byte = !byte;
2021-08-10 11:31:15 +00:00
std::swap(mask_info.has_ones, mask_info.has_zeros);
2021-04-22 15:14:58 +00:00
}
2021-08-10 11:31:15 +00:00
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, const MaskInfo & mask_info)
2021-04-22 15:14:58 +00:00
{
2021-06-07 11:23:34 +00:00
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
if (!column_function)
2021-04-22 15:14:58 +00:00
return;
ColumnWithTypeAndName result;
/// If mask contains only zeros, we can just create
/// an empty column with the execution result type.
2021-08-10 11:31:15 +00:00
if (!mask_info.has_ones)
{
auto result_type = column_function->getResultType();
auto empty_column = result_type->createColumn();
result = {std::move(empty_column), result_type, ""};
}
/// Filter column only if mask contains zeros.
2021-08-10 11:31:15 +00:00
else if (mask_info.has_zeros)
{
2021-08-10 11:31:15 +00:00
auto filtered = column_function->filter(mask, -1);
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
}
else
result = column_function->reduce();
2021-04-22 15:14:58 +00:00
column = std::move(result);
}
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
2021-04-22 15:14:58 +00:00
{
2021-06-07 11:23:34 +00:00
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
if (!column_function)
2021-04-22 15:14:58 +00:00
return;
if (!empty)
column = column_function->reduce();
else
column.column = column_function->getResultType()->createColumn();
2021-04-22 15:14:58 +00:00
}
int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments)
{
2021-05-14 14:07:24 +00:00
int last_short_circuit_argument_index = -1;
for (size_t i = 0; i != arguments.size(); ++i)
{
2021-06-10 17:41:33 +00:00
if (checkAndGetShortCircuitArgument(arguments[i].column))
last_short_circuit_argument_index = static_cast<int>(i);
}
2021-05-14 14:07:24 +00:00
return last_short_circuit_argument_index;
}
2021-08-10 11:31:15 +00:00
void copyMask(const PaddedPODArray<UInt8> & from, PaddedPODArray<UInt8> & to)
{
if (from.size() != to.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot copy mask, because source and destination have different size");
2021-08-10 11:31:15 +00:00
if (from.empty())
return;
memcpy(to.data(), from.data(), from.size() * sizeof(*from.data()));
}
2021-04-22 15:14:58 +00:00
}