mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-22 01:30:51 +00:00
added functions arrayAny and arrayAll [#CLICKHOUSE-3504]
This commit is contained in:
parent
ee2ea696ba
commit
a76bb9660c
@ -62,6 +62,8 @@ generate_function_register(Array
|
||||
FunctionArrayPushFront
|
||||
FunctionArrayPopBack
|
||||
FunctionArrayPopFront
|
||||
FunctionArrayHasAll
|
||||
FunctionArrayHasAny
|
||||
)
|
||||
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <tuple>
|
||||
#include <array>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -2494,12 +2495,15 @@ String FunctionArrayConcat::getName() const
|
||||
DataTypePtr FunctionArrayConcat::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception{"Function array requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
throw Exception{"Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
auto array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
|
||||
if (!array_type)
|
||||
throw Exception("First argument for function " + getName() + " must be an array but it has type "
|
||||
+ arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
for (auto i : ext::range(0, arguments.size()))
|
||||
{
|
||||
auto array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
|
||||
if (!array_type)
|
||||
throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type "
|
||||
+ arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
return getLeastCommonType(arguments);
|
||||
}
|
||||
@ -2837,4 +2841,95 @@ FunctionPtr FunctionArrayPopBack::create(const Context &)
|
||||
return std::make_shared<FunctionArrayPopBack>();
|
||||
}
|
||||
|
||||
|
||||
/// Implementation of FunctionArrayAllAny.
|
||||
|
||||
FunctionPtr FunctionArrayHasAll::create(const Context & context)
|
||||
{
|
||||
return std::make_shared<FunctionArrayHasAll>(context);
|
||||
}
|
||||
|
||||
FunctionPtr FunctionArrayHasAny::create(const Context & context)
|
||||
{
|
||||
return std::make_shared<FunctionArrayHasAny>(context);
|
||||
}
|
||||
|
||||
|
||||
DataTypePtr FunctionArrayHasAllAny::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
{
|
||||
for (auto i : ext::range(0, arguments.size()))
|
||||
{
|
||||
auto array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
|
||||
if (!array_type)
|
||||
throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type "
|
||||
+ arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
void FunctionArrayHasAllAny::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result)
|
||||
{
|
||||
size_t rows = block.rows();
|
||||
size_t num_args = arguments.size();
|
||||
|
||||
auto result_column = ColumnUInt8::create(rows);
|
||||
|
||||
DataTypePtr common_type = nullptr;
|
||||
auto commonType = [& common_type, & block, & arguments]()
|
||||
{
|
||||
if (common_type == nullptr)
|
||||
{
|
||||
DataTypes data_types;
|
||||
data_types.reserve(arguments.size());
|
||||
for (const auto & argument : arguments)
|
||||
data_types.push_back(block.getByPosition(argument).type);
|
||||
|
||||
common_type = getLeastCommonType(data_types);
|
||||
}
|
||||
|
||||
return common_type;
|
||||
};
|
||||
|
||||
Columns preprocessed_columns(num_args);
|
||||
|
||||
for (size_t i = 0; i < num_args; ++i)
|
||||
{
|
||||
const auto & argument = block.getByPosition(arguments[i]);
|
||||
ColumnPtr preprocessed_column = argument.column;
|
||||
|
||||
const auto argument_type = typeid_cast<const DataTypeArray *>(argument.type.get());
|
||||
const auto & nested_type = argument_type->getNestedType();
|
||||
|
||||
/// Converts Array(Nothing) or Array(Nullable(Nothing) to common type. Example: hasAll([Null, 1], [Null]) -> 1
|
||||
if (typeid_cast<const DataTypeNothing *>(removeNullable(nested_type).get()))
|
||||
preprocessed_column = castColumn(argument, commonType(), context);
|
||||
|
||||
preprocessed_columns[i] = std::move(preprocessed_column);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<IArraySource>> sources;
|
||||
|
||||
for (auto & argument_column : preprocessed_columns)
|
||||
{
|
||||
bool is_const = false;
|
||||
|
||||
if (auto argument_column_const = typeid_cast<const ColumnConst *>(argument_column.get()))
|
||||
{
|
||||
is_const = true;
|
||||
argument_column = argument_column_const->getDataColumnPtr();
|
||||
}
|
||||
|
||||
if (auto argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
|
||||
sources.emplace_back(createArraySource(*argument_column_array, is_const, rows));
|
||||
else
|
||||
throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
|
||||
auto result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get());
|
||||
sliceHas(*sources[0], *sources[1], all, *result_column_ptr);
|
||||
|
||||
block.getByPosition(result).column = std::move(result_column);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1540,6 +1540,48 @@ public:
|
||||
FunctionArrayPopBack() : FunctionArrayPop(false, name) {}
|
||||
};
|
||||
|
||||
class FunctionArrayHasAllAny : public IFunction
|
||||
{
|
||||
public:
|
||||
FunctionArrayHasAllAny(const Context & context, bool all, const char * name)
|
||||
: context(context), all(all), name(name) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return false; }
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
private:
|
||||
const Context & context;
|
||||
bool all;
|
||||
const char * name;
|
||||
};
|
||||
|
||||
class FunctionArrayHasAll : public FunctionArrayHasAllAny
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "hasAll";
|
||||
|
||||
static FunctionPtr create(const Context & context);
|
||||
|
||||
FunctionArrayHasAll(const Context & context) : FunctionArrayHasAllAny(context, true, name) {}
|
||||
};
|
||||
|
||||
class FunctionArrayHasAny : public FunctionArrayHasAllAny
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "hasAny";
|
||||
|
||||
static FunctionPtr create(const Context & context);
|
||||
|
||||
FunctionArrayHasAny(const Context & context) : FunctionArrayHasAllAny(context, false, name) {}
|
||||
};
|
||||
|
||||
struct NameHas { static constexpr auto name = "has"; };
|
||||
struct NameIndexOf { static constexpr auto name = "indexOf"; };
|
||||
|
@ -1153,4 +1153,135 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
|
||||
|
||||
template <bool all, typename FirstSliceType, typename SecondSliceType,
|
||||
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
|
||||
bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
|
||||
const UInt8 * first_null_map, const UInt8 * second_null_map)
|
||||
{
|
||||
const bool has_first_null_map = first_null_map != nullptr;
|
||||
const bool has_second_null_map = second_null_map != nullptr;
|
||||
|
||||
for (size_t i = 0; i < second.size; ++i)
|
||||
{
|
||||
bool has = false;
|
||||
for (size_t j = 0; j < first.size && !has; ++j)
|
||||
{
|
||||
const bool is_first_null = has_first_null_map && first_null_map[j];
|
||||
const bool is_second_null = has_second_null_map && second_null_map[i];
|
||||
|
||||
if (is_first_null && is_second_null)
|
||||
has = true;
|
||||
|
||||
if (!is_first_null && !is_second_null && isEqual(first, second, j, i))
|
||||
has = true;
|
||||
}
|
||||
|
||||
if (has && !all)
|
||||
return true;
|
||||
|
||||
if (!has && all)
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
return all;
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
|
||||
template <typename T, typename U>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.data[first_ind] == second.data[second_ind];
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
template <typename T>
|
||||
bool sliceEqualElements(const NumericArraySlice<T> & first, const GenericArraySlice & second, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
bool sliceEqualElements(const GenericArraySlice & first, const NumericArraySlice<U> & second, size_t, size_t)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
|
||||
{
|
||||
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
|
||||
}
|
||||
|
||||
template <bool all, typename T, typename U>
|
||||
bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
|
||||
{
|
||||
auto impl = sliceHasImpl<all, NumericArraySlice<T>, NumericArraySlice<U>, sliceEqualElements<T, U>>;
|
||||
return impl(first, second, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <bool all>
|
||||
bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second)
|
||||
{
|
||||
/// Generic arrays should have the same type in order to use column.compareAt(...)
|
||||
if (typeid(*first.elements) != typeid(*second.elements))
|
||||
return false;
|
||||
|
||||
auto impl = sliceHasImpl<all, GenericArraySlice, GenericArraySlice, sliceEqualElements>;
|
||||
return impl(first, second, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template <bool all, typename U>
|
||||
bool sliceHas(const GenericArraySlice & /*first*/, const NumericArraySlice<U> & /*second*/)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <bool all, typename T>
|
||||
bool sliceHas(const NumericArraySlice<T> & /*first*/, const GenericArraySlice & /*second*/)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
template <bool all, typename FirstArraySlice, typename SecondArraySlice>
|
||||
bool sliceHas(const FirstArraySlice & first, NullableArraySlice<SecondArraySlice> & second)
|
||||
{
|
||||
auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
|
||||
return impl(first, second, nullptr, second.null_map);
|
||||
}
|
||||
|
||||
template <bool all, typename FirstArraySlice, typename SecondArraySlice>
|
||||
bool sliceHas(const NullableArraySlice<FirstArraySlice> & first, SecondArraySlice & second)
|
||||
{
|
||||
auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
|
||||
return impl(first, second, first.null_map, nullptr);
|
||||
}
|
||||
|
||||
template <bool all, typename FirstArraySlice, typename SecondArraySlice>
|
||||
bool sliceHas(const NullableArraySlice<FirstArraySlice> & first, NullableArraySlice<SecondArraySlice> & second)
|
||||
{
|
||||
auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
|
||||
return impl(first, second, first.null_map, second.null_map);
|
||||
}
|
||||
|
||||
template <bool all, typename FirstSource, typename SecondSource>
|
||||
void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnUInt8 & result)
|
||||
{
|
||||
auto size = result.size();
|
||||
auto & data = result.getData();
|
||||
for (auto row : ext::range(0, size))
|
||||
{
|
||||
data[row] = static_cast<UInt8>(sliceHas<all>(first.getWhole(), second.getWhole()) ? 1 : 0);
|
||||
first.next();
|
||||
second.next();
|
||||
}
|
||||
}
|
||||
|
||||
void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result);
|
||||
|
||||
}
|
||||
|
25
dbms/src/Functions/GatherUtils_has.cpp
Normal file
25
dbms/src/Functions/GatherUtils_has.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#include <Functions/GatherUtils.h>
|
||||
#include <Functions/GatherUtils_selectors.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ArrayHasSelectArraySourcePair : public ArraySourcePairSelector<ArrayHasSelectArraySourcePair>
|
||||
{
|
||||
template <typename FirstSource, typename SecondSource>
|
||||
static void selectSourcePair(FirstSource && first, SecondSource && second, bool all, ColumnUInt8 & result)
|
||||
{
|
||||
if (all)
|
||||
arrayAllAny<true>(first, second, result);
|
||||
else
|
||||
arrayAllAny<false>(first, second, result);
|
||||
}
|
||||
};
|
||||
|
||||
void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result)
|
||||
{
|
||||
ArrayHasSelectArraySourcePair::select(first, second, all, result);
|
||||
}
|
||||
|
||||
}
|
@ -109,4 +109,26 @@ struct ArraySinkSourceSelector
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Base>
|
||||
struct ArraySourcePairSelector
|
||||
{
|
||||
template <typename ... Args>
|
||||
static void select(IArraySource & first, IArraySource & second, Args && ... args)
|
||||
{
|
||||
GetArraySourceSelector<Base>::select(first, second, args ...);
|
||||
}
|
||||
|
||||
template <typename FirstSource, typename ... Args>
|
||||
static void selectImpl(FirstSource && first, IArraySource & second, Args && ... args)
|
||||
{
|
||||
GetArraySourceSelector<Base>::select(second, first, args ...);
|
||||
}
|
||||
|
||||
template <typename SecondSource, typename FirstSource, typename ... Args>
|
||||
static void selectImpl(SecondSource && second, FirstSource && first, Args && ... args)
|
||||
{
|
||||
Base::selectSourcePair(first, second, args ...);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user