mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
added arrayIntersect [#CLICKHOUSE-3504]
This commit is contained in:
parent
515cb7fbca
commit
694b305036
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <Common/HashTable/HashSet.h>
|
#include <Common/HashTable/HashSet.h>
|
||||||
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
|
||||||
|
|
||||||
/** A hash table that allows you to clear the table in O(1).
|
/** A hash table that allows you to clear the table in O(1).
|
||||||
|
@ -45,7 +45,7 @@ String getExceptionMessagePrefix(const DataTypes & types)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing)
|
DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing, bool force_support_conversion)
|
||||||
{
|
{
|
||||||
|
|
||||||
auto getNothingOrThrow = [throw_if_result_is_nothing, & types](const std::string & reason)
|
auto getNothingOrThrow = [throw_if_result_is_nothing, & types](const std::string & reason)
|
||||||
@ -123,7 +123,7 @@ DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_noth
|
|||||||
if (!all_arrays)
|
if (!all_arrays)
|
||||||
return getNothingOrThrow(" because some of them are Array and some of them are not");
|
return getNothingOrThrow(" because some of them are Array and some of them are not");
|
||||||
|
|
||||||
return std::make_shared<DataTypeArray>(getMostSubtype(nested_types, false));
|
return std::make_shared<DataTypeArray>(getMostSubtype(nested_types, false, force_support_conversion));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,7 +165,8 @@ DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_noth
|
|||||||
|
|
||||||
DataTypes common_tuple_types(tuple_size);
|
DataTypes common_tuple_types(tuple_size);
|
||||||
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
|
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
|
||||||
common_tuple_types[elem_idx] = getMostSubtype(nested_types[elem_idx], throw_if_result_is_nothing);
|
common_tuple_types[elem_idx] =
|
||||||
|
getMostSubtype(nested_types[elem_idx], throw_if_result_is_nothing, force_support_conversion);
|
||||||
|
|
||||||
return std::make_shared<DataTypeTuple>(common_tuple_types);
|
return std::make_shared<DataTypeTuple>(common_tuple_types);
|
||||||
}
|
}
|
||||||
@ -195,10 +196,10 @@ DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_noth
|
|||||||
|
|
||||||
if (have_nullable)
|
if (have_nullable)
|
||||||
{
|
{
|
||||||
if (all_nullable)
|
if (all_nullable || force_support_conversion)
|
||||||
return std::make_shared<DataTypeNullable>(getMostSubtype(nested_types, false));
|
return std::make_shared<DataTypeNullable>(getMostSubtype(nested_types, false, force_support_conversion));
|
||||||
|
|
||||||
return getMostSubtype(nested_types, throw_if_result_is_nothing);
|
return getMostSubtype(nested_types, throw_if_result_is_nothing, force_support_conversion);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,7 +10,10 @@ namespace DB
|
|||||||
* DataTypeNothing is the most common subtype for all types.
|
* DataTypeNothing is the most common subtype for all types.
|
||||||
* Examples: most common subtype for UInt16, UInt8 and Int8 - Unt16.
|
* Examples: most common subtype for UInt16, UInt8 and Int8 - Unt16.
|
||||||
* Examples: most common subtype for Array(UInt8), Int8 is Nothing
|
* Examples: most common subtype for Array(UInt8), Int8 is Nothing
|
||||||
|
*
|
||||||
|
* If force_support_conversion is true, returns type which may be used to convert each argument to.
|
||||||
|
* Example: most common subtype for Array(UInt8) and Array(Nullable(Int32)) is Array(Nullable(UInt8) if force_support_conversion is true.
|
||||||
*/
|
*/
|
||||||
DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing = false);
|
DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing = false, bool force_support_conversion = false);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -64,6 +64,7 @@ generate_function_register(Array
|
|||||||
FunctionArrayPopFront
|
FunctionArrayPopFront
|
||||||
FunctionArrayHasAll
|
FunctionArrayHasAll
|
||||||
FunctionArrayHasAny
|
FunctionArrayHasAny
|
||||||
|
FunctionArrayIntersect
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,7 +20,8 @@
|
|||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <DataTypes/DataTypeNothing.h>
|
#include <DataTypes/DataTypeNothing.h>
|
||||||
|
#include <DataTypes/getMostSubtype.h>
|
||||||
|
#include <Core/TypeListNumber.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -2932,4 +2933,374 @@ void FunctionArrayHasAllAny::executeImpl(Block & block, const ColumnNumbers & ar
|
|||||||
block.getByPosition(result).column = std::move(result_column);
|
block.getByPosition(result).column = std::move(result_column);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Implementation of FunctionArrayIntersect.
|
||||||
|
|
||||||
|
FunctionPtr FunctionArrayIntersect::create(const Context & context)
|
||||||
|
{
|
||||||
|
return std::make_shared<FunctionArrayIntersect>(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
String FunctionArrayIntersect::getName() const
|
||||||
|
{
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & arguments) const
|
||||||
|
{
|
||||||
|
DataTypes nested_types;
|
||||||
|
nested_types.reserve(arguments.size());
|
||||||
|
|
||||||
|
bool has_nothing = false;
|
||||||
|
|
||||||
|
if (arguments.empty())
|
||||||
|
throw Exception{"Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||||
|
|
||||||
|
for (auto i : ext::range(0, arguments.size()))
|
||||||
|
{
|
||||||
|
auto array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
|
||||||
|
if (!array_type)
|
||||||
|
throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type "
|
||||||
|
+ arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
|
||||||
|
const auto & nested_type = array_type->getNestedType();
|
||||||
|
|
||||||
|
if (typeid_cast<const DataTypeNothing *>(nested_type.get()))
|
||||||
|
has_nothing = true;
|
||||||
|
else
|
||||||
|
nested_types.push_back(nested_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTypePtr result_type;
|
||||||
|
|
||||||
|
if (!nested_types.empty())
|
||||||
|
result_type = getMostSubtype(nested_types, true);
|
||||||
|
|
||||||
|
if (has_nothing)
|
||||||
|
result_type = std::make_shared<DataTypeNothing>();
|
||||||
|
|
||||||
|
return std::make_shared<DataTypeArray>(result_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, const DataTypePtr & data_type) const
|
||||||
|
{
|
||||||
|
if (auto column_nullable = checkAndGetColumn<ColumnNullable>(column.get()))
|
||||||
|
{
|
||||||
|
auto nullable_type = checkAndGetDataType<DataTypeNullable>(data_type.get());
|
||||||
|
const auto & nested = column_nullable->getNestedColumnPtr();
|
||||||
|
if (nullable_type)
|
||||||
|
{
|
||||||
|
auto casted_column = castRemoveNullable(nested, nullable_type->getNestedType());
|
||||||
|
return ColumnNullable::create(casted_column, column_nullable->getNullMapColumnPtr());
|
||||||
|
}
|
||||||
|
return castRemoveNullable(nested, data_type);
|
||||||
|
}
|
||||||
|
else if (auto column_array = checkAndGetColumn<ColumnArray>(column.get()))
|
||||||
|
{
|
||||||
|
auto array_type = checkAndGetDataType<DataTypeArray>(data_type.get());
|
||||||
|
if (!array_type)
|
||||||
|
throw Exception{"Cannot cast array column to column with type "
|
||||||
|
+ data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR};
|
||||||
|
|
||||||
|
auto casted_column = castRemoveNullable(column_array->getDataPtr(), array_type->getNestedType());
|
||||||
|
return ColumnArray::create(casted_column, column_array->getOffsetsPtr());
|
||||||
|
}
|
||||||
|
else if (auto column_tuple = checkAndGetColumn<ColumnTuple>(column.get()))
|
||||||
|
{
|
||||||
|
auto tuple_type = checkAndGetDataType<DataTypeTuple>(data_type.get());
|
||||||
|
|
||||||
|
if (!tuple_type)
|
||||||
|
throw Exception{"Cannot cast tuple column to type "
|
||||||
|
+ data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR};
|
||||||
|
|
||||||
|
auto columns_number = column_tuple->getColumns().size();
|
||||||
|
Columns columns(columns_number);
|
||||||
|
|
||||||
|
const auto & types = tuple_type->getElements();
|
||||||
|
|
||||||
|
for (auto i : ext::range(0, columns_number))
|
||||||
|
{
|
||||||
|
columns[i] = castRemoveNullable(column_tuple->getColumnPtr(i), types[i]);
|
||||||
|
}
|
||||||
|
return ColumnTuple::create(columns);
|
||||||
|
}
|
||||||
|
|
||||||
|
return column;
|
||||||
|
}
|
||||||
|
|
||||||
|
Columns FunctionArrayIntersect::castColumns(
|
||||||
|
Block & block, const ColumnNumbers & arguments, const DataTypePtr & return_type,
|
||||||
|
const DataTypePtr & return_type_with_nulls) const
|
||||||
|
{
|
||||||
|
size_t num_args = arguments.size();
|
||||||
|
Columns columns(num_args);
|
||||||
|
|
||||||
|
auto type_array = checkAndGetDataType<DataTypeArray>(return_type.get());
|
||||||
|
auto & type_nested = type_array->getNestedType();
|
||||||
|
auto type_not_nullable_nested = removeNullable(type_nested);
|
||||||
|
|
||||||
|
const bool is_numeric_or_string = type_not_nullable_nested->isNumber()
|
||||||
|
|| type_not_nullable_nested->isDateOrDateTime()
|
||||||
|
|| type_not_nullable_nested->isStringOrFixedString();
|
||||||
|
|
||||||
|
DataTypePtr nullable_return_type;
|
||||||
|
|
||||||
|
if (is_numeric_or_string)
|
||||||
|
{
|
||||||
|
auto type_nullable_nested = makeNullable(type_nested);
|
||||||
|
nullable_return_type = std::make_shared<DataTypeArray>(type_nullable_nested);
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool nested_is_nullable = type_nested->isNullable();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_args; ++i)
|
||||||
|
{
|
||||||
|
const ColumnWithTypeAndName & arg = block.getByPosition(arguments[i]);
|
||||||
|
auto & column = columns[i];
|
||||||
|
|
||||||
|
if (is_numeric_or_string)
|
||||||
|
{
|
||||||
|
/// Cast to Array(T) or Array(Nullable(T)).
|
||||||
|
if (nested_is_nullable)
|
||||||
|
{
|
||||||
|
if (arg.type->equals(*return_type))
|
||||||
|
column = arg.column;
|
||||||
|
else
|
||||||
|
column = castColumn(arg, return_type, context);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// If result has array type Array(T) still cast Array(Nullable(U)) to Array(Nullable(T))
|
||||||
|
/// because cannot cast Nullable(T) to T.
|
||||||
|
if (arg.type->equals(*return_type) || arg.type->equals(*nullable_return_type))
|
||||||
|
column = arg.column;
|
||||||
|
else if (static_cast<const DataTypeArray &>(*arg.type).getNestedType()->isNullable())
|
||||||
|
column = castColumn(arg, nullable_return_type, context);
|
||||||
|
else
|
||||||
|
column = castColumn(arg, return_type, context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// return_type_with_nulls is the most common subtype with possible nullable parts.
|
||||||
|
if (arg.type->equals(*return_type_with_nulls))
|
||||||
|
column = arg.column;
|
||||||
|
else
|
||||||
|
column = castColumn(arg, return_type_with_nulls, context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays(const Columns & columns) const
|
||||||
|
{
|
||||||
|
UnpackedArrays arrays;
|
||||||
|
|
||||||
|
size_t columns_number = columns.size();
|
||||||
|
arrays.is_const.assign(columns_number, false);
|
||||||
|
arrays.null_maps.resize(columns_number);
|
||||||
|
arrays.offsets.resize(columns_number);
|
||||||
|
arrays.nested_columns.resize(columns_number);
|
||||||
|
|
||||||
|
for (auto i : ext::range(0, columns_number))
|
||||||
|
{
|
||||||
|
auto argument_column = columns[i].get();
|
||||||
|
if (auto argument_column_const = typeid_cast<const ColumnConst *>(argument_column))
|
||||||
|
{
|
||||||
|
arrays.is_const[i] = true;
|
||||||
|
argument_column = argument_column_const->getDataColumnPtr().get();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto argument_column_array = typeid_cast<const ColumnArray *>(argument_column))
|
||||||
|
{
|
||||||
|
arrays.offsets[i] = &argument_column_array->getOffsets();
|
||||||
|
arrays.nested_columns[i] = &argument_column_array->getData();
|
||||||
|
if (auto column_nullable = typeid_cast<const ColumnNullable *>(arrays.nested_columns[i]))
|
||||||
|
{
|
||||||
|
arrays.null_maps[i] = &column_nullable->getNullMapData();
|
||||||
|
arrays.nested_columns[i] = &column_nullable->getNestedColumn();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR};
|
||||||
|
}
|
||||||
|
|
||||||
|
return arrays;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result)
|
||||||
|
{
|
||||||
|
const auto & return_type = block.getByPosition(result).type;
|
||||||
|
auto return_type_array = checkAndGetDataType<DataTypeArray>(return_type.get());
|
||||||
|
|
||||||
|
if (!return_type)
|
||||||
|
throw Exception{"Return type for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR};
|
||||||
|
|
||||||
|
const auto & nested_return_type = return_type_array->getNestedType();
|
||||||
|
|
||||||
|
if (typeid_cast<const DataTypeNothing *>(nested_return_type.get()))
|
||||||
|
{
|
||||||
|
block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(block.rows());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto num_args = arguments.size();
|
||||||
|
DataTypes data_types;
|
||||||
|
data_types.reserve(num_args);
|
||||||
|
for (size_t i = 0; i < num_args; ++i)
|
||||||
|
data_types.push_back(block.getByPosition(arguments[i]).type);
|
||||||
|
|
||||||
|
auto return_type_with_nulls = getMostSubtype(data_types, true, true);
|
||||||
|
|
||||||
|
Columns columns = castColumns(block, arguments, return_type, return_type_with_nulls);
|
||||||
|
|
||||||
|
UnpackedArrays arrays = prepareArrays(columns);
|
||||||
|
|
||||||
|
ColumnPtr result_column;
|
||||||
|
auto not_nullable_nested_return_type = removeNullable(nested_return_type);
|
||||||
|
TypeListNumbers::forEach(NumberExecutor(arrays, not_nullable_nested_return_type, result_column));
|
||||||
|
|
||||||
|
using DateMap = ClearableHashMap<DataTypeDate::FieldType, size_t, DefaultHash<DataTypeDate::FieldType>,
|
||||||
|
HashTableGrower<INITIAL_SIZE_DEGREE>,
|
||||||
|
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDate::FieldType)>>;
|
||||||
|
|
||||||
|
using DateTimeMap = ClearableHashMap<DataTypeDateTime::FieldType, size_t, DefaultHash<DataTypeDateTime::FieldType>,
|
||||||
|
HashTableGrower<INITIAL_SIZE_DEGREE>,
|
||||||
|
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDateTime::FieldType)>>;
|
||||||
|
|
||||||
|
using StringMap = ClearableHashMap<StringRef, size_t, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
|
||||||
|
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;
|
||||||
|
|
||||||
|
if (!result_column)
|
||||||
|
{
|
||||||
|
auto column = not_nullable_nested_return_type->createColumn();
|
||||||
|
|
||||||
|
if (checkDataType<DataTypeDate>(not_nullable_nested_return_type.get()))
|
||||||
|
result_column = execute<DateMap, ColumnVector<DataTypeDate::FieldType>, true>(arrays, std::move(column));
|
||||||
|
else if (checkDataType<DataTypeDateTime>(not_nullable_nested_return_type.get()))
|
||||||
|
result_column = execute<DateTimeMap, ColumnVector<DataTypeDateTime::FieldType>, true>(arrays, std::move(column));
|
||||||
|
else if(not_nullable_nested_return_type->isString())
|
||||||
|
result_column = execute<StringMap, ColumnString, false>(arrays, std::move(column));
|
||||||
|
else if(not_nullable_nested_return_type->isFixedString())
|
||||||
|
result_column = execute<StringMap, ColumnFixedString, false>(arrays, std::move(column));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
column = static_cast<const DataTypeArray &>(*return_type_with_nulls).getNestedType()->createColumn();
|
||||||
|
result_column = castRemoveNullable(execute<StringMap, IColumn, false>(arrays, std::move(column)), return_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
block.getByPosition(result).column = std::move(result_column);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, size_t>
|
||||||
|
void FunctionArrayIntersect::NumberExecutor::operator()()
|
||||||
|
{
|
||||||
|
using Map = ClearableHashMap<T, size_t, DefaultHash<T>, HashTableGrower<INITIAL_SIZE_DEGREE>,
|
||||||
|
HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>;
|
||||||
|
|
||||||
|
if (!result && typeid_cast<const DataTypeNumber<T> *>(data_type.get()))
|
||||||
|
result = execute<Map, ColumnVector<T>, true>(arrays, ColumnVector<T>::create());
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Map, typename ColumnType, bool is_numeric_column>
|
||||||
|
ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, MutableColumnPtr result_data_ptr)
|
||||||
|
{
|
||||||
|
auto args = arrays.nested_columns.size();
|
||||||
|
auto rows = arrays.offsets.front()->size();
|
||||||
|
|
||||||
|
bool all_nullable = true;
|
||||||
|
|
||||||
|
std::vector<const ColumnType *> columns;
|
||||||
|
columns.reserve(args);
|
||||||
|
for (auto arg : ext::range(0, args))
|
||||||
|
{
|
||||||
|
if constexpr (std::is_same<ColumnType, IColumn>::value)
|
||||||
|
columns.push_back(arrays.nested_columns[arg]);
|
||||||
|
else
|
||||||
|
columns.push_back(checkAndGetColumn<ColumnType>(arrays.nested_columns[arg]));
|
||||||
|
|
||||||
|
if (!columns.back())
|
||||||
|
throw Exception("Unexpected array type for function arrayIntersect", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
|
if (!arrays.null_maps[arg])
|
||||||
|
all_nullable = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto & result_data = static_cast<ColumnType &>(*result_data_ptr);
|
||||||
|
auto result_offsets_ptr = ColumnArray::ColumnOffsets::create(rows);
|
||||||
|
auto & result_offsets = static_cast<ColumnArray::ColumnOffsets &>(*result_offsets_ptr);
|
||||||
|
auto null_map_column = ColumnUInt8::create();
|
||||||
|
NullMap & null_map = static_cast<ColumnUInt8 &>(*null_map_column).getData();
|
||||||
|
|
||||||
|
Arena arena;
|
||||||
|
|
||||||
|
Map map;
|
||||||
|
std::vector<size_t> prev_off(args, 0);
|
||||||
|
size_t result_offset = 0;
|
||||||
|
for (auto row : ext::range(0, rows))
|
||||||
|
{
|
||||||
|
map.clear();
|
||||||
|
|
||||||
|
bool all_has_nullable = all_nullable;
|
||||||
|
|
||||||
|
for (auto arg : ext::range(0, args))
|
||||||
|
{
|
||||||
|
bool current_has_nullable = false;
|
||||||
|
size_t off = (*arrays.offsets[arg])[row];
|
||||||
|
for (auto i : ext::range(prev_off[arg], off))
|
||||||
|
{
|
||||||
|
if (arrays.null_maps[arg] && (*arrays.null_maps[arg])[i])
|
||||||
|
current_has_nullable = true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if constexpr (is_numeric_column)
|
||||||
|
++map[columns[arg]->getElement(i)];
|
||||||
|
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
|
||||||
|
++map[columns[arg]->getDataAt(i)];
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const char * data = nullptr;
|
||||||
|
++map[columns[arg]->serializeValueIntoArena(i, arena, data)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prev_off[arg] = off;
|
||||||
|
if (!current_has_nullable)
|
||||||
|
all_has_nullable = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (all_has_nullable)
|
||||||
|
{
|
||||||
|
++result_offset;
|
||||||
|
result_data.insertDefault();
|
||||||
|
null_map.push_back(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & pair : map)
|
||||||
|
{
|
||||||
|
if (pair.second == args)
|
||||||
|
{
|
||||||
|
++result_offset;
|
||||||
|
if constexpr (is_numeric_column)
|
||||||
|
result_data.insert(pair.first);
|
||||||
|
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
|
||||||
|
result_data.insertData(pair.first.data, pair.first.size);
|
||||||
|
else
|
||||||
|
result_data.deserializeAndInsertFromArena(pair.first.data);
|
||||||
|
|
||||||
|
if (all_nullable)
|
||||||
|
null_map.push_back(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result_offsets.getElement(row) = result_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr result_column = std::move(result_data_ptr);
|
||||||
|
if (all_nullable)
|
||||||
|
result_column = ColumnNullable::create(result_column, std::move(null_map_column));
|
||||||
|
return ColumnArray::create(result_column, std::move(result_offsets_ptr));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1540,6 +1540,64 @@ public:
|
|||||||
FunctionArrayPopBack() : FunctionArrayPop(false, name) {}
|
FunctionArrayPopBack() : FunctionArrayPop(false, name) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class FunctionArrayIntersect : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "arrayIntersect";
|
||||||
|
static FunctionPtr create(const Context & context);
|
||||||
|
FunctionArrayIntersect(const Context & context) : context(context) {};
|
||||||
|
|
||||||
|
String getName() const override;
|
||||||
|
|
||||||
|
bool isVariadic() const override { return true; }
|
||||||
|
size_t getNumberOfArguments() const override { return 0; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||||
|
|
||||||
|
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
|
||||||
|
|
||||||
|
bool useDefaultImplementationForConstants() const override { return true; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Context & context;
|
||||||
|
|
||||||
|
/// Initially allocate a piece of memory for 512 elements. NOTE: This is just a guess.
|
||||||
|
static constexpr size_t INITIAL_SIZE_DEGREE = 9;
|
||||||
|
|
||||||
|
struct UnpackedArrays
|
||||||
|
{
|
||||||
|
std::vector<char> is_const;
|
||||||
|
std::vector<const NullMap *> null_maps;
|
||||||
|
std::vector<const ColumnArray::ColumnOffsets::Container *> offsets;
|
||||||
|
ColumnRawPtrs nested_columns;
|
||||||
|
|
||||||
|
UnpackedArrays() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Cast column to data_type removing nullable if data_type hasn't.
|
||||||
|
/// It's expected that column can represent data_type after removing some NullMap's.
|
||||||
|
ColumnPtr castRemoveNullable(const ColumnPtr & column, const DataTypePtr & data_type) const;
|
||||||
|
Columns castColumns(Block & block, const ColumnNumbers & arguments,
|
||||||
|
const DataTypePtr & return_type, const DataTypePtr & return_type_with_nulls) const;
|
||||||
|
UnpackedArrays prepareArrays(const Columns & columns) const;
|
||||||
|
|
||||||
|
template <typename Map, typename ColumnType, bool is_numeric_column>
|
||||||
|
static ColumnPtr execute(const UnpackedArrays & arrays, MutableColumnPtr result_data);
|
||||||
|
|
||||||
|
struct NumberExecutor
|
||||||
|
{
|
||||||
|
const UnpackedArrays & arrays;
|
||||||
|
const DataTypePtr & data_type;
|
||||||
|
ColumnPtr & result;
|
||||||
|
|
||||||
|
NumberExecutor(const UnpackedArrays & arrays, const DataTypePtr & data_type, ColumnPtr & result)
|
||||||
|
: arrays(arrays), data_type(data_type), result(result) {}
|
||||||
|
|
||||||
|
template <typename T, size_t>
|
||||||
|
void operator()();
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
class FunctionArrayHasAllAny : public IFunction
|
class FunctionArrayHasAllAny : public IFunction
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -0,0 +1,14 @@
|
|||||||
|
[]
|
||||||
|
[]
|
||||||
|
[1]
|
||||||
|
[]
|
||||||
|
[1]
|
||||||
|
[1]
|
||||||
|
[1]
|
||||||
|
[NULL,1]
|
||||||
|
[1]
|
||||||
|
[1]
|
||||||
|
[[1,1]]
|
||||||
|
[[1,1]]
|
||||||
|
[(1,['a','b'])]
|
||||||
|
Array(Tuple(UInt8, Array(String)))
|
15
dbms/tests/queries/0_stateless/00556_array_intersect.sql
Normal file
15
dbms/tests/queries/0_stateless/00556_array_intersect.sql
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
select arrayIntersect([], []);
|
||||||
|
select arrayIntersect([1], []);
|
||||||
|
select arrayIntersect([1], [1]);
|
||||||
|
select arrayIntersect([1, 2], [1, 3], [2, 3]);
|
||||||
|
select arrayIntersect([1, 2], [1, 3], [1, 4]);
|
||||||
|
select arrayIntersect([1, -1], [1]);
|
||||||
|
select arrayIntersect([1, -1], [Null, 1]);
|
||||||
|
select arrayIntersect([1, -1, Null], [Null, 1]);
|
||||||
|
select arrayIntersect(cast([1, 2] as Array(Nullable(Int8))), [1, 3]);
|
||||||
|
select arrayIntersect(CAST([1, -1] AS Array(Nullable(Int8))), [NULL, 1]);
|
||||||
|
select arrayIntersect([[1, 2], [1, 1]], [[2, 1], [1, 1]]);
|
||||||
|
select arrayIntersect([[1, 2, Null], [1, 1]], [[-2, 1], [1, 1]]);
|
||||||
|
select arrayIntersect([(1, ['a', 'b']), (Null, ['c'])], [(2, ['c', Null]), (1, ['a', 'b'])]);
|
||||||
|
select toTypeName(arrayIntersect([(1, ['a', 'b']), (Null, ['c'])], [(2, ['c', Null]), (1, ['a', 'b'])]));
|
||||||
|
|
Loading…
Reference in New Issue
Block a user