2021-04-22 15:14:58 +00:00
|
|
|
#include <Columns/MaskOperations.h>
|
|
|
|
#include <Columns/ColumnFunction.h>
|
|
|
|
#include <Columns/ColumnNullable.h>
|
|
|
|
#include <Columns/ColumnNothing.h>
|
2021-04-27 12:49:58 +00:00
|
|
|
#include <Columns/ColumnsCommon.h>
|
2021-06-07 10:55:55 +00:00
|
|
|
#include <algorithm>
|
|
|
|
|
2021-04-22 15:14:58 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-06-07 11:52:54 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|
|
|
|
|
2021-04-27 12:49:58 +00:00
|
|
|
template <typename T>
|
2021-06-07 10:55:55 +00:00
|
|
|
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
2021-04-27 12:49:58 +00:00
|
|
|
if (mask.size() < data.size())
|
|
|
|
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
int from = data.size() - 1;
|
|
|
|
int index = mask.size() - 1;
|
|
|
|
data.resize(mask.size());
|
|
|
|
while (index >= 0)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
2021-06-07 10:55:55 +00:00
|
|
|
if (mask[index] ^ inverted)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
2021-04-27 12:49:58 +00:00
|
|
|
if (from < 0)
|
|
|
|
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
|
2021-04-22 15:14:58 +00:00
|
|
|
|
2021-04-27 12:49:58 +00:00
|
|
|
data[index] = data[from];
|
|
|
|
--from;
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|
2021-04-27 12:49:58 +00:00
|
|
|
|
|
|
|
--index;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (from != -1)
|
|
|
|
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Explicit instantiations - not to place the implementation of the function above in the header file.
|
|
|
|
#define INSTANTIATE(TYPE) \
|
2021-05-18 13:05:55 +00:00
|
|
|
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
|
2021-04-27 12:49:58 +00:00
|
|
|
|
|
|
|
INSTANTIATE(UInt8)
|
|
|
|
INSTANTIATE(UInt16)
|
|
|
|
INSTANTIATE(UInt32)
|
|
|
|
INSTANTIATE(UInt64)
|
|
|
|
INSTANTIATE(UInt128)
|
|
|
|
INSTANTIATE(UInt256)
|
|
|
|
INSTANTIATE(Int8)
|
|
|
|
INSTANTIATE(Int16)
|
|
|
|
INSTANTIATE(Int32)
|
|
|
|
INSTANTIATE(Int64)
|
|
|
|
INSTANTIATE(Int128)
|
|
|
|
INSTANTIATE(Int256)
|
|
|
|
INSTANTIATE(Float32)
|
|
|
|
INSTANTIATE(Float64)
|
|
|
|
INSTANTIATE(Decimal32)
|
|
|
|
INSTANTIATE(Decimal64)
|
|
|
|
INSTANTIATE(Decimal128)
|
|
|
|
INSTANTIATE(Decimal256)
|
|
|
|
INSTANTIATE(DateTime64)
|
|
|
|
INSTANTIATE(char *)
|
2021-05-14 14:07:24 +00:00
|
|
|
INSTANTIATE(UUID)
|
2021-04-27 12:49:58 +00:00
|
|
|
|
|
|
|
#undef INSTANTIATE
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
void expandOffsetsByMask(PaddedPODArray<UInt64> & offsets, const PaddedPODArray<UInt8> & mask, bool inverted)
|
2021-04-27 12:49:58 +00:00
|
|
|
{
|
|
|
|
if (mask.size() < offsets.size())
|
|
|
|
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
int index = mask.size() - 1;
|
|
|
|
int from = offsets.size() - 1;
|
|
|
|
offsets.resize(mask.size());
|
|
|
|
UInt64 prev_offset = offsets[from];
|
|
|
|
while (index >= 0)
|
|
|
|
{
|
2021-06-07 10:55:55 +00:00
|
|
|
if (mask[index] ^ inverted)
|
2021-04-27 12:49:58 +00:00
|
|
|
{
|
|
|
|
if (from < 0)
|
|
|
|
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
offsets[index] = offsets[from];
|
|
|
|
--from;
|
|
|
|
prev_offset = offsets[from];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
offsets[index] = prev_offset;
|
|
|
|
--index;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (from != -1)
|
|
|
|
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
MaskInfo getMaskFromColumn(
|
2021-05-17 16:06:46 +00:00
|
|
|
const ColumnPtr & column,
|
|
|
|
PaddedPODArray<UInt8> & res,
|
2021-06-07 10:55:55 +00:00
|
|
|
bool inverted,
|
2021-05-18 13:05:55 +00:00
|
|
|
const PaddedPODArray<UInt8> * mask_used_in_expanding,
|
|
|
|
UInt8 default_value_in_expanding,
|
2021-06-07 10:55:55 +00:00
|
|
|
bool inverted_mask_used_in_expanding,
|
2021-05-18 13:05:55 +00:00
|
|
|
UInt8 null_value,
|
2021-06-07 10:55:55 +00:00
|
|
|
const PaddedPODArray<UInt8> * null_bytemap,
|
|
|
|
PaddedPODArray<UInt8> * nulls)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
|
|
|
if (const auto * col = checkAndGetColumn<ColumnNullable>(*column))
|
|
|
|
{
|
|
|
|
const PaddedPODArray<UInt8> & null_map = checkAndGetColumn<ColumnUInt8>(*col->getNullMapColumnPtr())->getData();
|
2021-06-07 10:55:55 +00:00
|
|
|
return getMaskFromColumn(col->getNestedColumnPtr(), res, inverted, mask_used_in_expanding, default_value_in_expanding, inverted_mask_used_in_expanding, null_value, &null_map, nulls);
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
bool is_full_column = true;
|
|
|
|
if (mask_used_in_expanding && mask_used_in_expanding->size() != column->size())
|
|
|
|
is_full_column = false;
|
|
|
|
|
|
|
|
size_t size = is_full_column ? column->size() : mask_used_in_expanding->size();
|
|
|
|
res.resize(size);
|
|
|
|
|
|
|
|
bool only_null = column->onlyNull();
|
|
|
|
|
2021-05-18 13:05:55 +00:00
|
|
|
/// Some columns doesn't implement getBool() method and we cannot
|
|
|
|
/// convert them to mask, throw an exception in this case.
|
2021-05-17 13:06:11 +00:00
|
|
|
try
|
|
|
|
{
|
2021-06-07 10:55:55 +00:00
|
|
|
MaskInfo info;
|
|
|
|
bool value;
|
|
|
|
size_t column_index = 0;
|
|
|
|
for (size_t i = 0; i != size; ++i)
|
2021-05-17 13:06:11 +00:00
|
|
|
{
|
2021-06-07 10:55:55 +00:00
|
|
|
bool use_value_from_expanding_mask = mask_used_in_expanding && (!(*mask_used_in_expanding)[i] ^ inverted_mask_used_in_expanding);
|
|
|
|
if (use_value_from_expanding_mask)
|
|
|
|
value = inverted ? !default_value_in_expanding : default_value_in_expanding;
|
2021-06-07 14:09:05 +00:00
|
|
|
else if (only_null || (null_bytemap && (*null_bytemap)[column_index]))
|
2021-06-07 10:55:55 +00:00
|
|
|
{
|
|
|
|
value = inverted ? !null_value : null_value;
|
|
|
|
if (nulls)
|
|
|
|
(*nulls)[i] = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
value = inverted ? !column->getBool(column_index) : column->getBool(column_index);
|
|
|
|
|
|
|
|
if (value)
|
|
|
|
info.has_once = true;
|
2021-05-17 13:06:11 +00:00
|
|
|
else
|
2021-06-07 10:55:55 +00:00
|
|
|
info.has_zeros = true;
|
|
|
|
|
|
|
|
if (is_full_column || !use_value_from_expanding_mask)
|
|
|
|
++column_index;
|
|
|
|
|
|
|
|
res[i] = value;
|
2021-05-17 13:06:11 +00:00
|
|
|
}
|
2021-06-07 10:55:55 +00:00
|
|
|
|
|
|
|
return info;
|
2021-05-17 13:06:11 +00:00
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
2021-04-22 15:14:58 +00:00
|
|
|
throw Exception("Cannot convert column " + column.get()->getName() + " to mask", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2021-05-17 13:06:11 +00:00
|
|
|
}
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
MaskInfo disjunctionMasks(PaddedPODArray<UInt8> & mask1, const PaddedPODArray<UInt8> & mask2)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
|
|
|
if (mask1.size() != mask2.size())
|
2021-05-18 13:05:55 +00:00
|
|
|
throw Exception("Cannot make a disjunction of masks, they have different sizes", ErrorCodes::LOGICAL_ERROR);
|
2021-04-22 15:14:58 +00:00
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
MaskInfo info;
|
2021-04-22 15:14:58 +00:00
|
|
|
for (size_t i = 0; i != mask1.size(); ++i)
|
2021-06-07 10:55:55 +00:00
|
|
|
{
|
|
|
|
mask1[i] |= mask2[i];
|
|
|
|
if (mask1[i])
|
|
|
|
info.has_once = true;
|
|
|
|
else
|
|
|
|
info.has_zeros = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return info;
|
|
|
|
}
|
|
|
|
|
|
|
|
void inverseMask(PaddedPODArray<UInt8> & mask)
|
|
|
|
{
|
|
|
|
std::transform(mask.begin(), mask.end(), mask.begin(), [](UInt8 val){ return !val; });
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, const MaskInfo & mask_info, bool inverted)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
2021-06-07 11:23:34 +00:00
|
|
|
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
|
|
|
|
if (!column_function)
|
2021-04-22 15:14:58 +00:00
|
|
|
return;
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
ColumnWithTypeAndName result;
|
|
|
|
/// If mask contains only zeros, we can just create
|
|
|
|
/// an empty column with the execution result type.
|
|
|
|
if ((!inverted && !mask_info.has_once) || (inverted && !mask_info.has_zeros))
|
|
|
|
{
|
|
|
|
auto result_type = column_function->getResultType();
|
|
|
|
auto empty_column = result_type->createColumn();
|
|
|
|
result = {std::move(empty_column), result_type, ""};
|
|
|
|
}
|
|
|
|
/// Filter column only if mask contains zeros.
|
|
|
|
else if ((!inverted && mask_info.has_zeros) || (inverted && mask_info.has_once))
|
|
|
|
{
|
|
|
|
auto filtered = column_function->filter(mask, -1, inverted);
|
|
|
|
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
result = column_function->reduce();
|
|
|
|
|
2021-04-22 15:14:58 +00:00
|
|
|
column = std::move(result);
|
|
|
|
}
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
|
2021-04-22 15:14:58 +00:00
|
|
|
{
|
2021-06-07 11:23:34 +00:00
|
|
|
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
|
|
|
|
if (!column_function)
|
2021-04-22 15:14:58 +00:00
|
|
|
return;
|
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
if (!empty)
|
|
|
|
column = column_function->reduce();
|
|
|
|
else
|
|
|
|
column.column = column_function->getResultType()->createColumn();
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|
|
|
|
|
2021-05-14 14:07:24 +00:00
|
|
|
int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments)
|
2021-04-27 12:49:58 +00:00
|
|
|
{
|
2021-05-14 14:07:24 +00:00
|
|
|
int last_short_circuit_argument_index = -1;
|
|
|
|
for (size_t i = 0; i != arguments.size(); ++i)
|
2021-04-27 12:49:58 +00:00
|
|
|
{
|
2021-06-07 11:52:54 +00:00
|
|
|
if (const auto * column_function = checkAndGetShortCircuitArgument(arguments[i].column))
|
2021-05-14 14:07:24 +00:00
|
|
|
last_short_circuit_argument_index = i;
|
2021-04-27 12:49:58 +00:00
|
|
|
}
|
2021-05-14 14:07:24 +00:00
|
|
|
|
|
|
|
return last_short_circuit_argument_index;
|
2021-04-27 12:49:58 +00:00
|
|
|
}
|
|
|
|
|
2021-04-22 15:14:58 +00:00
|
|
|
}
|