ClickHouse/src/Columns/MaskOperations.cpp

242 lines
7.3 KiB
C++
Raw Normal View History

2021-04-22 15:14:58 +00:00
#include <Columns/MaskOperations.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnsCommon.h>
#include <algorithm>
2021-04-22 15:14:58 +00:00
namespace DB
{
namespace ErrorCodes
{
2021-06-07 11:52:54 +00:00
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
2021-04-22 15:14:58 +00:00
}
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted)
2021-04-22 15:14:58 +00:00
{
if (mask.size() < data.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
int from = data.size() - 1;
int index = mask.size() - 1;
data.resize(mask.size());
while (index >= 0)
2021-04-22 15:14:58 +00:00
{
if (mask[index] ^ inverted)
2021-04-22 15:14:58 +00:00
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
2021-04-22 15:14:58 +00:00
data[index] = data[from];
--from;
2021-04-22 15:14:58 +00:00
}
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
/// Explicit instantiations - not to place the implementation of the function above in the header file.
#define INSTANTIATE(TYPE) \
2021-05-18 13:05:55 +00:00
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)
INSTANTIATE(UInt32)
INSTANTIATE(UInt64)
INSTANTIATE(UInt128)
INSTANTIATE(UInt256)
INSTANTIATE(Int8)
INSTANTIATE(Int16)
INSTANTIATE(Int32)
INSTANTIATE(Int64)
INSTANTIATE(Int128)
INSTANTIATE(Int256)
INSTANTIATE(Float32)
INSTANTIATE(Float64)
INSTANTIATE(Decimal32)
INSTANTIATE(Decimal64)
INSTANTIATE(Decimal128)
INSTANTIATE(Decimal256)
INSTANTIATE(DateTime64)
INSTANTIATE(char *)
2021-05-14 14:07:24 +00:00
INSTANTIATE(UUID)
#undef INSTANTIATE
void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<UInt8> & mask, bool inverted)
{
if (mask.size() < offsets.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
int index = mask.size() - 1;
int from = offsets.size() - 1;
offsets.resize(mask.size());
UInt64 prev_offset = offsets[from];
while (index >= 0)
{
if (mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
offsets[index] = offsets[from];
--from;
prev_offset = offsets[from];
}
else
offsets[index] = prev_offset;
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
MaskInfo getMaskFromColumn(
2021-05-17 16:06:46 +00:00
const ColumnPtr & column,
PaddedPODArray<UInt8> & res,
bool inverted,
2021-05-18 13:05:55 +00:00
const PaddedPODArray<UInt8> * mask_used_in_expanding,
UInt8 default_value_in_expanding,
bool inverted_mask_used_in_expanding,
2021-05-18 13:05:55 +00:00
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls)
2021-04-22 15:14:58 +00:00
{
if (const auto * col = checkAndGetColumn<ColumnNullable>(*column))
{
const PaddedPODArray<UInt8> & null_map = checkAndGetColumn<ColumnUInt8>(*col->getNullMapColumnPtr())->getData();
return getMaskFromColumn(col->getNestedColumnPtr(), res, inverted, mask_used_in_expanding, default_value_in_expanding, inverted_mask_used_in_expanding, null_value, &null_map, nulls);
2021-04-22 15:14:58 +00:00
}
bool is_full_column = true;
if (mask_used_in_expanding && mask_used_in_expanding->size() != column->size())
is_full_column = false;
size_t size = is_full_column ? column->size() : mask_used_in_expanding->size();
res.resize(size);
bool only_null = column->onlyNull();
2021-05-18 13:05:55 +00:00
/// Some columns doesn't implement getBool() method and we cannot
/// convert them to mask, throw an exception in this case.
try
{
MaskInfo info;
bool value;
size_t column_index = 0;
for (size_t i = 0; i != size; ++i)
{
bool use_value_from_expanding_mask = mask_used_in_expanding && (!(*mask_used_in_expanding)[i] ^ inverted_mask_used_in_expanding);
if (use_value_from_expanding_mask)
value = inverted ? !default_value_in_expanding : default_value_in_expanding;
2021-06-07 14:09:05 +00:00
else if (only_null || (null_bytemap && (*null_bytemap)[column_index]))
{
value = inverted ? !null_value : null_value;
if (nulls)
(*nulls)[i] = 1;
}
else
value = inverted ? !column->getBool(column_index) : column->getBool(column_index);
if (value)
info.has_once = true;
else
info.has_zeros = true;
if (is_full_column || !use_value_from_expanding_mask)
++column_index;
res[i] = value;
}
return info;
}
catch (...)
{
2021-04-22 15:14:58 +00:00
throw Exception("Cannot convert column " + column.get()->getName() + " to mask", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
2021-04-22 15:14:58 +00:00
}
MaskInfo disjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2)
2021-04-22 15:14:58 +00:00
{
if (mask1.size() != mask2.size())
2021-05-18 13:05:55 +00:00
throw Exception("Cannot make a disjunction of masks, they have different sizes", ErrorCodes::LOGICAL_ERROR);
2021-04-22 15:14:58 +00:00
MaskInfo info;
2021-04-22 15:14:58 +00:00
for (size_t i = 0; i != mask1.size(); ++i)
{
mask1[i] |= mask2[i];
if (mask1[i])
info.has_once = true;
else
info.has_zeros = true;
}
return info;
}
void inverseMask(PaddedPODArray<UInt8> & mask)
{
std::transform(mask.begin(), mask.end(), mask.begin(), [](UInt8 val){ return !val; });
2021-04-22 15:14:58 +00:00
}
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, const MaskInfo & mask_info, bool inverted)
2021-04-22 15:14:58 +00:00
{
2021-06-07 11:23:34 +00:00
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
if (!column_function)
2021-04-22 15:14:58 +00:00
return;
ColumnWithTypeAndName result;
/// If mask contains only zeros, we can just create
/// an empty column with the execution result type.
if ((!inverted && !mask_info.has_once) || (inverted && !mask_info.has_zeros))
{
auto result_type = column_function->getResultType();
auto empty_column = result_type->createColumn();
result = {std::move(empty_column), result_type, ""};
}
/// Filter column only if mask contains zeros.
else if ((!inverted && mask_info.has_zeros) || (inverted && mask_info.has_once))
{
auto filtered = column_function->filter(mask, -1, inverted);
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
}
else
result = column_function->reduce();
2021-04-22 15:14:58 +00:00
column = std::move(result);
}
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
2021-04-22 15:14:58 +00:00
{
2021-06-07 11:23:34 +00:00
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
if (!column_function)
2021-04-22 15:14:58 +00:00
return;
if (!empty)
column = column_function->reduce();
else
column.column = column_function->getResultType()->createColumn();
2021-04-22 15:14:58 +00:00
}
2021-05-14 14:07:24 +00:00
int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments)
{
2021-05-14 14:07:24 +00:00
int last_short_circuit_argument_index = -1;
for (size_t i = 0; i != arguments.size(); ++i)
{
2021-06-07 11:52:54 +00:00
if (const auto * column_function = checkAndGetShortCircuitArgument(arguments[i].column))
2021-05-14 14:07:24 +00:00
last_short_circuit_argument_index = i;
}
2021-05-14 14:07:24 +00:00
return last_short_circuit_argument_index;
}
2021-04-22 15:14:58 +00:00
}