mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 19:02:04 +00:00
1071 lines
39 KiB
C++
1071 lines
39 KiB
C++
#pragma once
|
|
#include <Functions/IFunction.h>
|
|
#include <Functions/FunctionFactory.h>
|
|
#include <Functions/FunctionHelpers.h>
|
|
#include <DataTypes/DataTypeArray.h>
|
|
#include <DataTypes/DataTypeMap.h>
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
#include <DataTypes/getLeastSupertype.h>
|
|
#include <Columns/ColumnArray.h>
|
|
#include <Columns/ColumnMap.h>
|
|
#include <Columns/ColumnString.h>
|
|
#include <Columns/ColumnFixedString.h>
|
|
#include <Columns/ColumnsNumber.h>
|
|
#include <Columns/ColumnNullable.h>
|
|
#include <Common/FieldVisitorsAccurateComparison.h>
|
|
#include <Common/memcmpSmall.h>
|
|
#include <Common/assert_cast.h>
|
|
#include <Columns/ColumnLowCardinality.h>
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
|
#include <Interpreters/castColumn.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int ILLEGAL_COLUMN;
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
}
|
|
|
|
using NullMap = PaddedPODArray<UInt8>;
|
|
|
|
/// ConcreteActions -- what to do when the index was found.
|
|
|
|
struct HasAction
|
|
{
|
|
using ResultType = UInt8;
|
|
static constexpr const bool resume_execution = false;
|
|
static constexpr void apply(ResultType& current, size_t) noexcept { current = 1; }
|
|
};
|
|
|
|
/// The index is returned starting from 1.
|
|
struct IndexOfAction
|
|
{
|
|
using ResultType = UInt64;
|
|
static constexpr const bool resume_execution = false;
|
|
static constexpr void apply(ResultType& current, size_t j) noexcept { current = j + 1; }
|
|
};
|
|
|
|
struct CountEqualAction
|
|
{
|
|
using ResultType = UInt64;
|
|
static constexpr const bool resume_execution = true;
|
|
static constexpr void apply(ResultType & current, size_t) noexcept { ++current; }
|
|
};
|
|
|
|
/// How to perform the search depending on the arguments data types.
|
|
namespace Impl
|
|
{
|
|
template <
|
|
typename ConcreteAction,
|
|
bool RightArgIsConstant = false,
|
|
typename IntegralInitial = UInt64,
|
|
typename IntegralResult = UInt64>
|
|
struct Main
|
|
{
|
|
private:
|
|
using Initial = IntegralInitial;
|
|
using Result = IntegralResult;
|
|
|
|
using ResultType = typename ConcreteAction::ResultType;
|
|
using ResultArr = PaddedPODArray<ResultType>;
|
|
|
|
using ArrOffset = ColumnArray::Offset;
|
|
using ArrOffsets = ColumnArray::Offsets;
|
|
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wsign-compare"
|
|
|
|
static constexpr bool compare(const Initial & left, const PaddedPODArray<Result> & right, size_t, size_t i) noexcept
|
|
{
|
|
return left == right[i];
|
|
}
|
|
|
|
static constexpr bool compare(const PaddedPODArray<Initial> & left, const Result & right, size_t i, size_t) noexcept
|
|
{
|
|
return left[i] == right;
|
|
}
|
|
|
|
static constexpr bool compare(
|
|
const PaddedPODArray<Initial> & left, const PaddedPODArray<Result> & right, size_t i, size_t j) noexcept
|
|
{
|
|
return left[i] == right[j];
|
|
}
|
|
|
|
/// LowCardinality
|
|
static bool compare(const IColumn & left, const Result & right, size_t i, size_t)
|
|
{
|
|
return left.getUInt(i) == right;
|
|
}
|
|
|
|
/// Generic
|
|
static bool compare(const IColumn & left, const IColumn & right, size_t i, size_t j)
|
|
{
|
|
return 0 == left.compareAt(i, RightArgIsConstant ? 0 : j, right, 1);
|
|
}
|
|
|
|
#pragma clang diagnostic pop
|
|
|
|
static constexpr bool hasNull(const NullMap * const null_map, size_t i) noexcept { return (*null_map)[i]; }
|
|
|
|
template <size_t Case, typename Data, typename Target>
|
|
static void process(
|
|
const Data & data, const ArrOffsets & offsets, const Target & target, ResultArr & result,
|
|
[[maybe_unused]] const NullMap * const null_map_data,
|
|
[[maybe_unused]] const NullMap * const null_map_item)
|
|
{
|
|
if constexpr (std::is_same_v<Data, IColumn> && std::is_same_v<Target, IColumn>)
|
|
{
|
|
/// Generic variant is using IColumn::compare function that only allows to compare columns of identical types.
|
|
if (typeid(data) != typeid(target))
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Columns {} and {} cannot be compared", data.getName(), target.getName());
|
|
}
|
|
|
|
const size_t size = offsets.size();
|
|
|
|
result.resize(size);
|
|
|
|
ArrOffset current_offset = 0;
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
{
|
|
const size_t array_size = offsets[i] - current_offset;
|
|
ResultType current = 0;
|
|
|
|
for (size_t j = 0; j < array_size; ++j)
|
|
{
|
|
if constexpr (Case == 2) /// Right arg is Nullable
|
|
if (hasNull(null_map_item, i))
|
|
continue;
|
|
|
|
if constexpr (Case == 3) /// Left arg is an array of Nullables
|
|
if (hasNull(null_map_data, current_offset + j))
|
|
continue;
|
|
|
|
if constexpr (Case == 4) /// Both args are nullable
|
|
{
|
|
const bool right_is_null = hasNull(null_map_data, current_offset + j);
|
|
const bool left_is_null = hasNull(null_map_item, i);
|
|
|
|
if (right_is_null != left_is_null)
|
|
continue;
|
|
|
|
if (!right_is_null && !compare(data, target, current_offset + j, i))
|
|
continue;
|
|
}
|
|
else if (!compare(data, target, current_offset + j, i))
|
|
continue;
|
|
|
|
ConcreteAction::apply(current, j);
|
|
|
|
if constexpr (!ConcreteAction::resume_execution)
|
|
break;
|
|
}
|
|
|
|
result[i] = current;
|
|
current_offset = offsets[i];
|
|
}
|
|
}
|
|
|
|
public:
|
|
template <typename Data, typename Target>
|
|
static void vector(
|
|
const Data & data,
|
|
const ArrOffsets & offsets,
|
|
const Target & value,
|
|
ResultArr & result,
|
|
const NullMap * const null_map_data,
|
|
const NullMap * const null_map_item)
|
|
{
|
|
if (!null_map_data && !null_map_item)
|
|
process<1>(data, offsets, value, result, null_map_data, null_map_item);
|
|
else if (!null_map_data && null_map_item)
|
|
process<2>(data, offsets, value, result, null_map_data, null_map_item);
|
|
else if (null_map_data && !null_map_item)
|
|
process<3>(data, offsets, value, result, null_map_data, null_map_item);
|
|
else
|
|
process<4>(data, offsets, value, result, null_map_data, null_map_item);
|
|
}
|
|
};
|
|
|
|
/// When the 2nd function argument is a NULL value.
|
|
template <typename ConcreteAction>
|
|
struct Null
|
|
{
|
|
using ResultType = typename ConcreteAction::ResultType;
|
|
|
|
static void process(
|
|
const ColumnArray::Offsets & offsets,
|
|
PaddedPODArray<ResultType> & result,
|
|
[[maybe_unused]] const NullMap * null_map_data)
|
|
{
|
|
const size_t size = offsets.size();
|
|
|
|
if (!null_map_data)
|
|
{
|
|
result.resize_fill(size);
|
|
return;
|
|
}
|
|
|
|
result.resize(size);
|
|
|
|
ColumnArray::Offset current_offset = 0;
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
{
|
|
ResultType current = 0;
|
|
const size_t array_size = offsets[i] - current_offset;
|
|
|
|
for (size_t j = 0; j < array_size; ++j)
|
|
{
|
|
if (!(*null_map_data)[current_offset + j])
|
|
continue;
|
|
|
|
ConcreteAction::apply(current, j);
|
|
|
|
if constexpr (!ConcreteAction::resume_execution)
|
|
break;
|
|
}
|
|
|
|
result[i] = current;
|
|
current_offset = offsets[i];
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename ConcreteAction>
|
|
struct String
|
|
{
|
|
private:
|
|
using Offset = ColumnString::Offset;
|
|
template <bool IsConst> using OffsetT = std::conditional_t<IsConst, Offset, const ColumnString::Offsets &>;
|
|
using ArrayOffset = ColumnArray::Offset;
|
|
using ResultType = typename ConcreteAction::ResultType;
|
|
|
|
template <bool IsConst, bool HasNullMapData, bool HasNullMapItem>
|
|
static void processImpl(
|
|
const ColumnString::Chars & data,
|
|
const ColumnArray::Offsets & offsets,
|
|
const ColumnString::Offsets & string_offsets,
|
|
const ColumnString::Chars & item_values,
|
|
OffsetT<IsConst> item_offsets,
|
|
PaddedPODArray<ResultType> & result,
|
|
[[maybe_unused]] const NullMap * data_map,
|
|
[[maybe_unused]] const NullMap * item_map)
|
|
{
|
|
const size_t size = offsets.size();
|
|
|
|
result.resize(size);
|
|
|
|
ArrayOffset current_offset = 0;
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
{
|
|
const ArrayOffset array_size = offsets[i] - current_offset;
|
|
|
|
[[maybe_unused]] Offset value_pos = 0;
|
|
[[maybe_unused]] Offset value_size = 0;
|
|
|
|
if constexpr (!IsConst) // workaround because ?: ternary operator is not constexpr
|
|
{
|
|
if (0 != i) value_pos = item_offsets[i - 1];
|
|
value_size = item_offsets[i] - value_pos;
|
|
}
|
|
|
|
ResultType current = 0;
|
|
|
|
for (size_t j = 0; j < array_size; ++j)
|
|
{
|
|
const ArrayOffset string_pos = current_offset + j == 0
|
|
? 0
|
|
: string_offsets[current_offset + j - 1];
|
|
|
|
const ArrayOffset string_size = string_offsets[current_offset + j] - string_pos - IsConst * 1;
|
|
|
|
if constexpr (IsConst)
|
|
{
|
|
if constexpr (HasNullMapData)
|
|
if ((*data_map)[current_offset + j])
|
|
continue;
|
|
|
|
if (!memequalSmallAllowOverflow15(item_values.data(), item_offsets, &data[string_pos], string_size))
|
|
continue;
|
|
}
|
|
else if constexpr (HasNullMapData)
|
|
{
|
|
if ((*data_map)[current_offset + j])
|
|
{
|
|
if constexpr (!HasNullMapItem)
|
|
continue;
|
|
|
|
if (!(*item_map)[i])
|
|
continue;
|
|
}
|
|
else if (!memequalSmallAllowOverflow15(&item_values[value_pos], value_size, &data[string_pos], string_size))
|
|
continue;
|
|
}
|
|
else if (!memequalSmallAllowOverflow15(&item_values[value_pos], value_size, &data[string_pos], string_size))
|
|
continue;
|
|
|
|
ConcreteAction::apply(current, j);
|
|
|
|
if constexpr (!ConcreteAction::resume_execution)
|
|
break;
|
|
}
|
|
|
|
result[i] = current;
|
|
current_offset = offsets[i];
|
|
}
|
|
}
|
|
|
|
template <bool IsConst>
|
|
static inline void invokeCheckNullMaps(
|
|
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets,
|
|
const ColumnString::Offsets & str_offsets, const ColumnString::Chars & values,
|
|
OffsetT<IsConst> item_offsets,
|
|
PaddedPODArray<ResultType> & result, const NullMap * data_map, const NullMap * item_map)
|
|
{
|
|
if (data_map && item_map)
|
|
processImpl<IsConst, true, true>(data, offsets, str_offsets, values, item_offsets, result, data_map, item_map);
|
|
else if (data_map)
|
|
processImpl<IsConst, true, false>(data, offsets, str_offsets, values, item_offsets, result, data_map, item_map);
|
|
else if (item_map)
|
|
processImpl<IsConst, false, true>(data, offsets, str_offsets, values, item_offsets, result, data_map, item_map);
|
|
else
|
|
processImpl<IsConst, false, false>(data, offsets, str_offsets, values, item_offsets, result, data_map, item_map);
|
|
}
|
|
|
|
public:
|
|
static inline void process(
|
|
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets,
|
|
const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values,
|
|
Offset item_offsets, PaddedPODArray<ResultType> & result,
|
|
const NullMap * data_map, const NullMap * item_map)
|
|
{
|
|
invokeCheckNullMaps<true>(data, offsets, string_offsets, item_values, item_offsets, result, data_map, item_map);
|
|
}
|
|
|
|
static inline void process(
|
|
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets,
|
|
const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values,
|
|
const ColumnString::Offsets & item_offsets, PaddedPODArray<ResultType> & result,
|
|
const NullMap * data_map, const NullMap * item_map)
|
|
{
|
|
invokeCheckNullMaps<false>(data, offsets, string_offsets, item_values, item_offsets, result, data_map, item_map);
|
|
}
|
|
};
|
|
}
|
|
|
|
template <typename ConcreteAction, typename Name>
|
|
class FunctionArrayIndex : public IFunction
|
|
{
|
|
public:
|
|
static constexpr auto name = Name::name;
|
|
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayIndex>(); }
|
|
|
|
/// Get function name.
|
|
String getName() const override { return name; }
|
|
|
|
bool useDefaultImplementationForNulls() const override { return false; }
|
|
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
|
|
|
size_t getNumberOfArguments() const override { return 2; }
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
{
|
|
auto first_argument_type = arguments[0].type;
|
|
auto second_argument_type = arguments[1].type;
|
|
|
|
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(first_argument_type.get());
|
|
const DataTypeMap * map_type = checkAndGetDataType<DataTypeMap>(first_argument_type.get());
|
|
|
|
DataTypePtr inner_type;
|
|
|
|
/// If map is first argument only has(map_column, key) function is supported
|
|
if constexpr (std::is_same_v<ConcreteAction, HasAction>)
|
|
{
|
|
if (!array_type && !map_type)
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
|
"First argument for function {} must be an array or map. Actual {}",
|
|
getName(),
|
|
first_argument_type->getName());
|
|
|
|
inner_type = map_type ? map_type->getKeyType() : array_type->getNestedType();
|
|
}
|
|
else
|
|
{
|
|
if (!array_type)
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
|
"First argument for function {} must be an array. Actual {}",
|
|
getName(),
|
|
first_argument_type->getName());
|
|
|
|
inner_type = array_type->getNestedType();
|
|
}
|
|
|
|
if (!second_argument_type->onlyNull() && !allowArguments(inner_type, second_argument_type))
|
|
{
|
|
const char * first_argument_type_name = map_type ? "map" : "array";
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
|
"Types of {} and 2nd argument of function `{}` must be identical up to nullability, cardinality, "
|
|
"numeric types, or Enum and numeric type. Passed: {} and {}.",
|
|
first_argument_type_name,
|
|
getName(),
|
|
first_argument_type->getName(),
|
|
second_argument_type->getName());
|
|
}
|
|
|
|
return std::make_shared<DataTypeNumber<ResultType>>();
|
|
}
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
|
{
|
|
if constexpr (std::is_same_v<ConcreteAction, HasAction>)
|
|
{
|
|
if (isMap(arguments[0].type))
|
|
{
|
|
auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst();
|
|
|
|
const auto & map_column = assert_cast<const ColumnMap &>(*non_const_map_column);
|
|
const auto & map_array_column = map_column.getNestedColumn();
|
|
auto offsets = map_array_column.getOffsetsPtr();
|
|
auto keys = map_column.getNestedData().getColumnPtr(0);
|
|
auto array_column = ColumnArray::create(keys, offsets);
|
|
|
|
const auto & type_map = assert_cast<const DataTypeMap &>(*arguments[0].type);
|
|
auto array_type = std::make_shared<DataTypeArray>(type_map.getKeyType());
|
|
|
|
auto arguments_copy = arguments;
|
|
arguments_copy[0].column = std::move(array_column);
|
|
arguments_copy[0].type = std::move(array_type);
|
|
arguments_copy[0].name = arguments[0].name;
|
|
|
|
return executeArrayImpl(arguments_copy, result_type);
|
|
}
|
|
}
|
|
|
|
return executeArrayImpl(arguments, result_type);
|
|
}
|
|
|
|
private:
|
|
using ResultType = typename ConcreteAction::ResultType;
|
|
using ResultColumnType = ColumnVector<ResultType>;
|
|
using ResultColumnPtr = decltype(ResultColumnType::create());
|
|
|
|
using NullMaps = std::pair<const NullMap *, const NullMap *>;
|
|
|
|
struct ExecutionData
|
|
{
|
|
const IColumn& left;
|
|
const IColumn& right;
|
|
const ColumnArray::Offsets& offsets;
|
|
ColumnPtr result_column;
|
|
NullMaps maps;
|
|
ResultColumnPtr result { ResultColumnType::create() };
|
|
|
|
inline void moveResult() { result_column = std::move(result); }
|
|
};
|
|
|
|
static inline bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg)
|
|
{
|
|
auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type));
|
|
auto arg_decayed = removeNullable(removeLowCardinality(arg));
|
|
|
|
return ((isNativeNumber(inner_type_decayed) || isEnum(inner_type_decayed)) && isNativeNumber(arg_decayed))
|
|
|| getLeastSupertype(DataTypes{inner_type_decayed, arg_decayed});
|
|
}
|
|
|
|
/**
|
|
* If one or both arguments passed to this function are nullable,
|
|
* we create a new column that contains non-nullable arguments:
|
|
*
|
|
* - if the 1st argument is a non-constant array of nullable values,
|
|
* it is turned into a non-constant array of ordinary values + a null
|
|
* byte map;
|
|
* - if the 2nd argument is a nullable value, it is turned into an
|
|
* ordinary value + a null byte map.
|
|
*
|
|
* Note that since constant arrays have quite a specific structure
|
|
* (they are vectors of Fields, which may represent the NULL value),
|
|
* they do not require any preprocessing.
|
|
*/
|
|
ColumnPtr executeArrayImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
|
|
{
|
|
const ColumnPtr & ptr = arguments[0].column;
|
|
|
|
/**
|
|
* The columns here have two general cases, either being Array(T) or Const(Array(T)).
|
|
* The last type will return nullptr after casting to ColumnArray, so we leave the casting
|
|
* to execute* functions.
|
|
*/
|
|
const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(ptr.get());
|
|
const ColumnNullable * nullable = nullptr;
|
|
|
|
if (col_array)
|
|
nullable = checkAndGetColumn<ColumnNullable>(col_array->getData());
|
|
|
|
const auto & arg_column = arguments[1].column;
|
|
const ColumnNullable * arg_nullable = checkAndGetColumn<ColumnNullable>(*arg_column);
|
|
|
|
if (!nullable && !arg_nullable)
|
|
{
|
|
return executeOnNonNullable(arguments, result_type);
|
|
}
|
|
else
|
|
{
|
|
/**
|
|
* To correctly process the Nullable values (either #col_array, #arg_column or both) we create a new columns
|
|
* and operate on it. The columns structure follows:
|
|
* {0, 1, 2, 3, 4}
|
|
* {data (array) argument, "value" argument, data null map, "value" null map, function result}.
|
|
*/
|
|
ColumnsWithTypeAndName source_columns(4);
|
|
|
|
if (nullable)
|
|
{
|
|
const auto & nested_col = nullable->getNestedColumnPtr();
|
|
|
|
auto & data = source_columns[0];
|
|
|
|
data.column = ColumnArray::create(nested_col, col_array->getOffsetsPtr());
|
|
data.type = std::make_shared<DataTypeArray>(
|
|
static_cast<const DataTypeNullable &>(
|
|
*static_cast<const DataTypeArray &>(
|
|
*arguments[0].type
|
|
).getNestedType()
|
|
).getNestedType());
|
|
|
|
auto & null_map = source_columns[2];
|
|
|
|
null_map.column = nullable->getNullMapColumnPtr();
|
|
null_map.type = std::make_shared<DataTypeUInt8>();
|
|
}
|
|
else
|
|
{
|
|
auto & data = source_columns[0];
|
|
data = arguments[0];
|
|
}
|
|
|
|
if (arg_nullable)
|
|
{
|
|
auto & arg = source_columns[1];
|
|
arg.column = arg_nullable->getNestedColumnPtr();
|
|
arg.type =
|
|
static_cast<const DataTypeNullable &>(
|
|
*arguments[1].type
|
|
).getNestedType();
|
|
|
|
auto & null_map = source_columns[3];
|
|
null_map.column = arg_nullable->getNullMapColumnPtr();
|
|
null_map.type = std::make_shared<DataTypeUInt8>();
|
|
}
|
|
else
|
|
{
|
|
auto & arg = source_columns[1];
|
|
arg = arguments[1];
|
|
}
|
|
|
|
/// Now perform the function.
|
|
return executeOnNonNullable(source_columns, result_type);
|
|
}
|
|
}
|
|
|
|
#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64
|
|
|
|
ColumnPtr executeOnNonNullable(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
|
|
{
|
|
if (const auto* const left_arr = checkAndGetColumn<ColumnArray>(arguments[0].column.get()))
|
|
{
|
|
if (checkAndGetColumn<ColumnLowCardinality>(&left_arr->getData()))
|
|
{
|
|
if (auto res = executeLowCardinality(arguments))
|
|
return res;
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName());
|
|
}
|
|
}
|
|
|
|
ColumnPtr res;
|
|
if (!((res = executeIntegral<INTEGRAL_TPL_PACK>(arguments))
|
|
|| (res = executeConst(arguments, result_type))
|
|
|| (res = executeString(arguments))
|
|
|| (res = executeGeneric(arguments))))
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName());
|
|
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* The Array's internal data type may be quite tricky (containing a Nullable type somewhere). To process the
|
|
* Nullable types correctly, for each data type specialisation we provide two null maps (one for the data and one
|
|
* for the items). By convention they are passed as the third and the fourth argument, respectively
|
|
* (counting from 1).
|
|
*
|
|
* @return {nullptr, nullptr} if there are less than 3 arguments.
|
|
* @return {null_map_data, nullptr} if there are three arguments
|
|
* @return {nullptr, null_map_item} if there are four arguments but the third is missing.
|
|
* @return {null_map_data, null_map_item} if there are four arguments.
|
|
*/
|
|
static NullMaps getNullMaps(const ColumnsWithTypeAndName & arguments) noexcept
|
|
{
|
|
if (arguments.size() < 3)
|
|
return {nullptr, nullptr};
|
|
|
|
const NullMap * null_map_data = nullptr;
|
|
const NullMap * null_map_item = nullptr;
|
|
|
|
if (const auto & data_map = arguments[2].column; data_map)
|
|
null_map_data = &assert_cast<const ColumnUInt8 &>(*data_map).getData();
|
|
|
|
if (const auto & item_map = arguments[3].column; item_map)
|
|
null_map_item = &assert_cast<const ColumnUInt8 &>(*item_map).getData();
|
|
|
|
return {null_map_data, null_map_item};
|
|
}
|
|
|
|
/**
|
|
* Given a variadic pack #Integral, apply executeIntegralExpanded with such parameters:
|
|
* Integral s = {s1, s2, ...}
|
|
* (s1, s1, s2, ...), (s2, s1, s2, ...), (s3, s1, s2, ...)
|
|
*/
|
|
template <typename... Integral>
|
|
static inline ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments)
|
|
{
|
|
const ColumnArray * const left = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
|
|
|
|
if (!left)
|
|
return nullptr;
|
|
|
|
const ColumnPtr right_converted_ptr = arguments[1].column->convertToFullColumnIfLowCardinality();
|
|
const IColumn& right = *right_converted_ptr.get();
|
|
|
|
ExecutionData data = {
|
|
left->getData(),
|
|
right,
|
|
left->getOffsets(),
|
|
nullptr,
|
|
getNullMaps(arguments)
|
|
};
|
|
|
|
if (executeIntegral<Integral...>(data))
|
|
return data.result_column;
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
template <typename... Integral>
|
|
static inline bool executeIntegral(ExecutionData& data)
|
|
{
|
|
return (executeIntegralExpanded<Integral, Integral...>(data) || ...);
|
|
}
|
|
|
|
/// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ...
|
|
template <typename A, typename... Other>
|
|
static inline bool executeIntegralExpanded(ExecutionData& data)
|
|
{
|
|
return (executeIntegralImpl<A, Other>(data) || ...);
|
|
}
|
|
|
|
/**
|
|
* The internal data type of the first argument (target array), if it's integral, like UInt8, may differ from the
|
|
* second argument, namely, the @e value, so it's possible to invoke the <tt>has(Array(Int8), UInt64)</tt> e.g.
|
|
* so we have to check all possible variants for #Initial and #Resulting types.
|
|
*/
|
|
template <typename Initial, typename Resulting>
|
|
static bool executeIntegralImpl(ExecutionData& data)
|
|
{
|
|
const ColumnVector<Initial> * col_nested = checkAndGetColumn<ColumnVector<Initial>>(&data.left);
|
|
|
|
if (!col_nested)
|
|
return false;
|
|
|
|
const auto [null_map_data, null_map_item] = data.maps;
|
|
|
|
if (data.right.onlyNull())
|
|
Impl::Null<ConcreteAction>::process(
|
|
data.offsets,
|
|
data.result->getData(),
|
|
null_map_data);
|
|
else if (const auto item_arg_const = checkAndGetColumnConst<ColumnVector<Resulting>>(&data.right))
|
|
Impl::Main<ConcreteAction, true, Initial, Resulting>::vector(
|
|
col_nested->getData(),
|
|
data.offsets,
|
|
item_arg_const->template getValue<Resulting>(),
|
|
data.result->getData(),
|
|
null_map_data,
|
|
nullptr);
|
|
else if (const auto item_arg_vector = checkAndGetColumn<ColumnVector<Resulting>>(&data.right))
|
|
Impl::Main<ConcreteAction, false, Initial, Resulting>::vector(
|
|
col_nested->getData(),
|
|
data.offsets,
|
|
item_arg_vector->getData(),
|
|
data.result->getData(),
|
|
null_map_data,
|
|
null_map_item);
|
|
else
|
|
return false;
|
|
|
|
data.moveResult();
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Catches arguments of type LowCardinality(T) (left) and U (right).
|
|
*
|
|
* The perftests showed that the amount of action needed to convert the non-constant right argument to the index column
|
|
* (similar to the left one's) is significantly higher than converting the array itself to an ordinary column.
|
|
*
|
|
* So, in terms of performance it's more optimal to fall back to default implementation and catch only constant
|
|
* right arguments.
|
|
*
|
|
* Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 .
|
|
*/
|
|
static ColumnPtr executeLowCardinality(const ColumnsWithTypeAndName & arguments)
|
|
{
|
|
const ColumnArray * const col_array = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
|
|
|
|
if (!col_array)
|
|
return nullptr;
|
|
|
|
const ColumnLowCardinality * const col_lc = checkAndGetColumn<ColumnLowCardinality>(&col_array->getData());
|
|
|
|
if (!col_lc)
|
|
return nullptr;
|
|
|
|
const auto [null_map_data, null_map_item] = getNullMaps(arguments);
|
|
|
|
if (const ColumnConst * col_arg_const = checkAndGetColumn<ColumnConst>(*arguments[1].column))
|
|
{
|
|
const IColumnUnique & col_lc_dict = col_lc->getDictionary();
|
|
|
|
const DataTypeArray * const array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
|
|
const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType());
|
|
|
|
ColumnPtr col_arg_cloned = castColumn(
|
|
{col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr);
|
|
|
|
ResultColumnPtr col_result = ResultColumnType::create();
|
|
UInt64 index = 0;
|
|
|
|
if (!col_arg_cloned->isNullAt(0))
|
|
{
|
|
if (col_arg_cloned->isNullable())
|
|
col_arg_cloned = checkAndGetColumn<ColumnNullable>(*col_arg_cloned)->getNestedColumnPtr();
|
|
|
|
StringRef elem = col_arg_cloned->getDataAt(0);
|
|
|
|
if (std::optional<UInt64> maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index)
|
|
{
|
|
index = *maybe_index;
|
|
}
|
|
else
|
|
{
|
|
const size_t offsets_size = col_array->getOffsets().size();
|
|
auto & data = col_result->getData();
|
|
|
|
data.resize_fill(offsets_size);
|
|
|
|
return col_result;
|
|
}
|
|
}
|
|
|
|
Impl::Main<ConcreteAction, true>::vector(
|
|
col_lc->getIndexes(),
|
|
col_array->getOffsets(),
|
|
index, /** Assuming LowCardinality has index of NULL always as zero. */
|
|
col_result->getData(),
|
|
null_map_data,
|
|
null_map_item);
|
|
|
|
return col_result;
|
|
}
|
|
else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U
|
|
{
|
|
const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T)
|
|
const ColumnNullable& left_nullable = *checkAndGetColumn<ColumnNullable>(left_casted.get());
|
|
|
|
const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData();
|
|
|
|
const IColumn & left_ptr = left_nullable.getNestedColumn();
|
|
|
|
const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality();
|
|
const ColumnNullable * const right_nullable = checkAndGetColumn<ColumnNullable>(right_casted.get());
|
|
|
|
const NullMap * const null_map_right_casted = right_nullable
|
|
? &right_nullable->getNullMapColumn().getData()
|
|
: null_map_item;
|
|
|
|
const IColumn& right_ptr = right_nullable
|
|
? right_nullable->getNestedColumn()
|
|
: *right_casted.get();
|
|
|
|
ExecutionData data =
|
|
{
|
|
left_ptr, right_ptr,
|
|
col_array->getOffsets(),
|
|
nullptr,
|
|
{null_map_left_casted, null_map_right_casted}};
|
|
|
|
if (dispatchConvertedLowCardinalityColumns(data))
|
|
return data.result_column;
|
|
}
|
|
else // LowCardinality(T) and U, T not Nullable
|
|
{
|
|
if (arguments[1].column->isNullable())
|
|
return nullptr;
|
|
|
|
if (const auto* const arg_lc = checkAndGetColumn<ColumnLowCardinality>(arguments[1].column.get());
|
|
arg_lc && arg_lc->isNullable())
|
|
return nullptr;
|
|
|
|
// LowCardinality(T) and U (possibly LowCardinality(V))
|
|
|
|
const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality();
|
|
const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality();
|
|
|
|
ExecutionData data =
|
|
{
|
|
*left_casted.get(), *right_casted.get(), col_array->getOffsets(),
|
|
nullptr, {null_map_data, null_map_item}
|
|
};
|
|
|
|
if (dispatchConvertedLowCardinalityColumns(data))
|
|
return data.result_column;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data)
|
|
{
|
|
if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays
|
|
return executeIntegral<INTEGRAL_TPL_PACK>(data);
|
|
|
|
if (checkAndGetColumn<ColumnString>(&data.left))
|
|
return executeStringImpl(data);
|
|
|
|
Impl::Main<ConcreteAction, true>::vector(
|
|
data.left,
|
|
data.offsets, data.right,
|
|
data.result->getData(),
|
|
data.maps.first, data.maps.second);
|
|
|
|
data.moveResult();
|
|
return true;
|
|
}
|
|
|
|
#undef INTEGRAL_TPL_PACK
|
|
|
|
static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments)
|
|
{
|
|
const ColumnArray * array = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
|
|
|
|
if (!array)
|
|
return nullptr;
|
|
|
|
const ColumnString * left = checkAndGetColumn<ColumnString>(&array->getData());
|
|
|
|
if (!left)
|
|
return nullptr;
|
|
|
|
const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality();
|
|
const IColumn & right = *right_ptr.get();
|
|
|
|
ExecutionData data = {
|
|
*left, right, array->getOffsets(),
|
|
nullptr, getNullMaps(arguments),
|
|
std::move(ResultColumnType::create())
|
|
};
|
|
|
|
if (executeStringImpl(data))
|
|
return data.result_column;
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
static bool executeStringImpl(ExecutionData& data)
|
|
{
|
|
const auto [null_map_data, null_map_item] = data.maps;
|
|
const ColumnString& left = *typeid_cast<const ColumnString* const>(&data.left);
|
|
|
|
if (data.right.onlyNull())
|
|
Impl::Null<ConcreteAction>::process(
|
|
data.offsets,
|
|
data.result->getData(),
|
|
null_map_data);
|
|
else if (const auto *const item_arg_const = checkAndGetColumnConstStringOrFixedString(&data.right))
|
|
{
|
|
const ColumnString * item_const_string =
|
|
checkAndGetColumn<ColumnString>(&item_arg_const->getDataColumn());
|
|
|
|
const ColumnFixedString * item_const_fixedstring =
|
|
checkAndGetColumn<ColumnFixedString>(&item_arg_const->getDataColumn());
|
|
|
|
if (item_const_string)
|
|
Impl::String<ConcreteAction>::process(
|
|
left.getChars(),
|
|
data.offsets,
|
|
left.getOffsets(),
|
|
item_const_string->getChars(),
|
|
item_const_string->getDataAt(0).size,
|
|
data.result->getData(),
|
|
null_map_data,
|
|
null_map_item);
|
|
else if (item_const_fixedstring)
|
|
Impl::String<ConcreteAction>::process(
|
|
left.getChars(),
|
|
data.offsets,
|
|
left.getOffsets(),
|
|
item_const_fixedstring->getChars(),
|
|
item_const_fixedstring->getN(),
|
|
data.result->getData(),
|
|
null_map_data,
|
|
null_map_item);
|
|
else
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column");
|
|
}
|
|
else if (const auto *const item_arg_vector = checkAndGetColumn<ColumnString>(&data.right))
|
|
{
|
|
Impl::String<ConcreteAction>::process(
|
|
left.getChars(),
|
|
data.offsets,
|
|
left.getOffsets(),
|
|
item_arg_vector->getChars(),
|
|
item_arg_vector->getOffsets(),
|
|
data.result->getData(),
|
|
null_map_data,
|
|
null_map_item);
|
|
}
|
|
else
|
|
return false;
|
|
|
|
data.moveResult();
|
|
return true;
|
|
}
|
|
|
|
static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
|
|
{
|
|
const ColumnConst * col_array = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
|
|
|
|
if (!col_array)
|
|
return nullptr;
|
|
|
|
Array arr = col_array->getValue<Array>();
|
|
|
|
const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality();
|
|
const IColumn * item_arg = right_ptr.get();
|
|
|
|
if (isColumnConst(*item_arg))
|
|
{
|
|
ResultType current = 0;
|
|
const auto & value = (*item_arg)[0];
|
|
|
|
for (size_t i = 0, size = arr.size(); i < size; ++i)
|
|
{
|
|
if (!applyVisitor(FieldVisitorAccurateEquals(), arr[i], value))
|
|
continue;
|
|
|
|
ConcreteAction::apply(current, i);
|
|
|
|
if constexpr (!ConcreteAction::resume_execution)
|
|
break;
|
|
}
|
|
|
|
return result_type->createColumnConst(item_arg->size(), static_cast<ResultType>(current));
|
|
}
|
|
else
|
|
{
|
|
/// Null map of the 2nd function argument, if it applies.
|
|
const NullMap * null_map = nullptr;
|
|
|
|
if (arguments.size() > 2)
|
|
if (const auto & col = arguments[3].column; col)
|
|
null_map = &assert_cast<const ColumnUInt8 &>(*col).getData();
|
|
|
|
const size_t size = item_arg->size();
|
|
auto col_res = ResultColumnType::create(size);
|
|
|
|
auto & data = col_res->getData();
|
|
|
|
for (size_t row = 0; row < size; ++row)
|
|
{
|
|
const auto & value = (*item_arg)[row];
|
|
|
|
data[row] = 0;
|
|
|
|
for (size_t i = 0, arr_size = arr.size(); i < arr_size; ++i)
|
|
{
|
|
if (arr[i].isNull())
|
|
{
|
|
if (!null_map)
|
|
continue;
|
|
|
|
if (!(*null_map)[row])
|
|
continue;
|
|
}
|
|
else if (!applyVisitor(FieldVisitorAccurateEquals(), arr[i], value))
|
|
continue;
|
|
|
|
ConcreteAction::apply(data[row], i);
|
|
|
|
if constexpr (!ConcreteAction::resume_execution)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return col_res;
|
|
}
|
|
}
|
|
|
|
static ColumnPtr executeGeneric(const ColumnsWithTypeAndName & arguments)
|
|
{
|
|
const ColumnArray * col = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
|
|
|
|
if (!col)
|
|
return nullptr;
|
|
|
|
DataTypePtr array_elements_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
|
|
const DataTypePtr & index_type = arguments[1].type;
|
|
|
|
DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, index_type});
|
|
|
|
ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type);
|
|
|
|
const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality();
|
|
ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type);
|
|
|
|
auto col_res = ResultColumnType::create();
|
|
|
|
auto [null_map_data, null_map_item] = getNullMaps(arguments);
|
|
|
|
if (item_arg->onlyNull())
|
|
Impl::Null<ConcreteAction>::process(
|
|
col->getOffsets(),
|
|
col_res->getData(),
|
|
null_map_data);
|
|
else if (isColumnConst(*item_arg))
|
|
Impl::Main<ConcreteAction, true>::vector(
|
|
*col_nested,
|
|
col->getOffsets(),
|
|
typeid_cast<const ColumnConst &>(*item_arg).getDataColumn(),
|
|
col_res->getData(), /// TODO This is wrong.
|
|
null_map_data,
|
|
nullptr);
|
|
else
|
|
Impl::Main<ConcreteAction>::vector(
|
|
*col_nested,
|
|
col->getOffsets(),
|
|
*item_arg,
|
|
col_res->getData(),
|
|
null_map_data,
|
|
null_map_item);
|
|
|
|
return col_res;
|
|
}
|
|
};
|
|
}
|