added functions arrayAny and arrayAll [#CLICKHOUSE-3504]

This commit is contained in:
Nikolai Kochetov 2018-01-09 18:24:27 +03:00
parent ee2ea696ba
commit a76bb9660c
6 changed files with 322 additions and 5 deletions

View File

@ -62,6 +62,8 @@ generate_function_register(Array
FunctionArrayPushFront
FunctionArrayPopBack
FunctionArrayPopFront
FunctionArrayHasAll
FunctionArrayHasAny
)

View File

@ -19,6 +19,7 @@
#include <Interpreters/castColumn.h>
#include <tuple>
#include <array>
#include <DataTypes/DataTypeNothing.h>
namespace DB
@ -2494,12 +2495,15 @@ String FunctionArrayConcat::getName() const
DataTypePtr FunctionArrayConcat::getReturnTypeImpl(const DataTypes & arguments) const
{
if (arguments.empty())
throw Exception{"Function array requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
throw Exception{"Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
auto array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
if (!array_type)
throw Exception("First argument for function " + getName() + " must be an array but it has type "
+ arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
for (auto i : ext::range(0, arguments.size()))
{
auto array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
if (!array_type)
throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type "
+ arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
return getLeastCommonType(arguments);
}
@ -2837,4 +2841,95 @@ FunctionPtr FunctionArrayPopBack::create(const Context &)
return std::make_shared<FunctionArrayPopBack>();
}
/// Implementation of FunctionArrayAllAny.
FunctionPtr FunctionArrayHasAll::create(const Context & context)
{
return std::make_shared<FunctionArrayHasAll>(context);
}
FunctionPtr FunctionArrayHasAny::create(const Context & context)
{
return std::make_shared<FunctionArrayHasAny>(context);
}
DataTypePtr FunctionArrayHasAllAny::getReturnTypeImpl(const DataTypes & arguments) const
{
for (auto i : ext::range(0, arguments.size()))
{
auto array_type = typeid_cast<const DataTypeArray *>(arguments[i].get());
if (!array_type)
throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type "
+ arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
return std::make_shared<DataTypeUInt8>();
}
void FunctionArrayHasAllAny::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result)
{
size_t rows = block.rows();
size_t num_args = arguments.size();
auto result_column = ColumnUInt8::create(rows);
DataTypePtr common_type = nullptr;
auto commonType = [& common_type, & block, & arguments]()
{
if (common_type == nullptr)
{
DataTypes data_types;
data_types.reserve(arguments.size());
for (const auto & argument : arguments)
data_types.push_back(block.getByPosition(argument).type);
common_type = getLeastCommonType(data_types);
}
return common_type;
};
Columns preprocessed_columns(num_args);
for (size_t i = 0; i < num_args; ++i)
{
const auto & argument = block.getByPosition(arguments[i]);
ColumnPtr preprocessed_column = argument.column;
const auto argument_type = typeid_cast<const DataTypeArray *>(argument.type.get());
const auto & nested_type = argument_type->getNestedType();
/// Converts Array(Nothing) or Array(Nullable(Nothing) to common type. Example: hasAll([Null, 1], [Null]) -> 1
if (typeid_cast<const DataTypeNothing *>(removeNullable(nested_type).get()))
preprocessed_column = castColumn(argument, commonType(), context);
preprocessed_columns[i] = std::move(preprocessed_column);
}
std::vector<std::unique_ptr<IArraySource>> sources;
for (auto & argument_column : preprocessed_columns)
{
bool is_const = false;
if (auto argument_column_const = typeid_cast<const ColumnConst *>(argument_column.get()))
{
is_const = true;
argument_column = argument_column_const->getDataColumnPtr();
}
if (auto argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
sources.emplace_back(createArraySource(*argument_column_array, is_const, rows));
else
throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR};
}
auto result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get());
sliceHas(*sources[0], *sources[1], all, *result_column_ptr);
block.getByPosition(result).column = std::move(result_column);
}
}

View File

@ -1540,6 +1540,48 @@ public:
FunctionArrayPopBack() : FunctionArrayPop(false, name) {}
};
class FunctionArrayHasAllAny : public IFunction
{
public:
FunctionArrayHasAllAny(const Context & context, bool all, const char * name)
: context(context), all(all), name(name) {}
String getName() const override { return name; }
bool isVariadic() const override { return false; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override;
bool useDefaultImplementationForConstants() const override { return true; }
private:
const Context & context;
bool all;
const char * name;
};
class FunctionArrayHasAll : public FunctionArrayHasAllAny
{
public:
static constexpr auto name = "hasAll";
static FunctionPtr create(const Context & context);
FunctionArrayHasAll(const Context & context) : FunctionArrayHasAllAny(context, true, name) {}
};
class FunctionArrayHasAny : public FunctionArrayHasAllAny
{
public:
static constexpr auto name = "hasAny";
static FunctionPtr create(const Context & context);
FunctionArrayHasAny(const Context & context) : FunctionArrayHasAllAny(context, false, name) {}
};
struct NameHas { static constexpr auto name = "has"; };
struct NameIndexOf { static constexpr auto name = "indexOf"; };

View File

@ -1153,4 +1153,135 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con
}
}
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
template <bool all, typename FirstSliceType, typename SecondSliceType,
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
const UInt8 * first_null_map, const UInt8 * second_null_map)
{
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
for (size_t i = 0; i < second.size; ++i)
{
bool has = false;
for (size_t j = 0; j < first.size && !has; ++j)
{
const bool is_first_null = has_first_null_map && first_null_map[j];
const bool is_second_null = has_second_null_map && second_null_map[i];
if (is_first_null && is_second_null)
has = true;
if (!is_first_null && !is_second_null && isEqual(first, second, j, i))
has = true;
}
if (has && !all)
return true;
if (!has && all)
return false;
}
return all;
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsign-compare"
template <typename T, typename U>
bool sliceEqualElements(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second, size_t first_ind, size_t second_ind)
{
return first.data[first_ind] == second.data[second_ind];
}
#pragma GCC diagnostic pop
template <typename T>
bool sliceEqualElements(const NumericArraySlice<T> & first, const GenericArraySlice & second, size_t, size_t)
{
return false;
}
template <typename U>
bool sliceEqualElements(const GenericArraySlice & first, const NumericArraySlice<U> & second, size_t, size_t)
{
return false;
}
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
}
template <bool all, typename T, typename U>
bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
{
auto impl = sliceHasImpl<all, NumericArraySlice<T>, NumericArraySlice<U>, sliceEqualElements<T, U>>;
return impl(first, second, nullptr, nullptr);
}
template <bool all>
bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second)
{
/// Generic arrays should have the same type in order to use column.compareAt(...)
if (typeid(*first.elements) != typeid(*second.elements))
return false;
auto impl = sliceHasImpl<all, GenericArraySlice, GenericArraySlice, sliceEqualElements>;
return impl(first, second, nullptr, nullptr);
}
template <bool all, typename U>
bool sliceHas(const GenericArraySlice & /*first*/, const NumericArraySlice<U> & /*second*/)
{
return false;
}
template <bool all, typename T>
bool sliceHas(const NumericArraySlice<T> & /*first*/, const GenericArraySlice & /*second*/)
{
return false;
}
template <bool all, typename FirstArraySlice, typename SecondArraySlice>
bool sliceHas(const FirstArraySlice & first, NullableArraySlice<SecondArraySlice> & second)
{
auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
return impl(first, second, nullptr, second.null_map);
}
template <bool all, typename FirstArraySlice, typename SecondArraySlice>
bool sliceHas(const NullableArraySlice<FirstArraySlice> & first, SecondArraySlice & second)
{
auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
return impl(first, second, first.null_map, nullptr);
}
template <bool all, typename FirstArraySlice, typename SecondArraySlice>
bool sliceHas(const NullableArraySlice<FirstArraySlice> & first, NullableArraySlice<SecondArraySlice> & second)
{
auto impl = sliceHasImpl<all, FirstArraySlice, SecondArraySlice, sliceEqualElements<FirstArraySlice, SecondArraySlice>>;
return impl(first, second, first.null_map, second.null_map);
}
template <bool all, typename FirstSource, typename SecondSource>
void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnUInt8 & result)
{
auto size = result.size();
auto & data = result.getData();
for (auto row : ext::range(0, size))
{
data[row] = static_cast<UInt8>(sliceHas<all>(first.getWhole(), second.getWhole()) ? 1 : 0);
first.next();
second.next();
}
}
void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result);
}

View File

@ -0,0 +1,25 @@
#include <Functions/GatherUtils.h>
#include <Functions/GatherUtils_selectors.h>
namespace DB
{
struct ArrayHasSelectArraySourcePair : public ArraySourcePairSelector<ArrayHasSelectArraySourcePair>
{
template <typename FirstSource, typename SecondSource>
static void selectSourcePair(FirstSource && first, SecondSource && second, bool all, ColumnUInt8 & result)
{
if (all)
arrayAllAny<true>(first, second, result);
else
arrayAllAny<false>(first, second, result);
}
};
void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result)
{
ArrayHasSelectArraySourcePair::select(first, second, all, result);
}
}

View File

@ -109,4 +109,26 @@ struct ArraySinkSourceSelector
}
};
template <typename Base>
struct ArraySourcePairSelector
{
template <typename ... Args>
static void select(IArraySource & first, IArraySource & second, Args && ... args)
{
GetArraySourceSelector<Base>::select(first, second, args ...);
}
template <typename FirstSource, typename ... Args>
static void selectImpl(FirstSource && first, IArraySource & second, Args && ... args)
{
GetArraySourceSelector<Base>::select(second, first, args ...);
}
template <typename SecondSource, typename FirstSource, typename ... Args>
static void selectImpl(SecondSource && second, FirstSource && first, Args && ... args)
{
Base::selectSourcePair(first, second, args ...);
}
};
}