From a76bb9660c68cb2ddce987bc74340337b2d07b18 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Jan 2018 18:24:27 +0300 Subject: [PATCH] added functions arrayAny and arrayAll [#CLICKHOUSE-3504] --- dbms/src/Functions/CMakeLists.txt | 2 + dbms/src/Functions/FunctionsArray.cpp | 105 ++++++++++++++++- dbms/src/Functions/FunctionsArray.h | 42 +++++++ dbms/src/Functions/GatherUtils.h | 131 +++++++++++++++++++++ dbms/src/Functions/GatherUtils_has.cpp | 25 ++++ dbms/src/Functions/GatherUtils_selectors.h | 22 ++++ 6 files changed, 322 insertions(+), 5 deletions(-) create mode 100644 dbms/src/Functions/GatherUtils_has.cpp diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index eb326931243..9bfbed2ce09 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -62,6 +62,8 @@ generate_function_register(Array FunctionArrayPushFront FunctionArrayPopBack FunctionArrayPopFront + FunctionArrayHasAll + FunctionArrayHasAny ) diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index e132577a340..d8d3b4b482c 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB @@ -2494,12 +2495,15 @@ String FunctionArrayConcat::getName() const DataTypePtr FunctionArrayConcat::getReturnTypeImpl(const DataTypes & arguments) const { if (arguments.empty()) - throw Exception{"Function array requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception{"Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - auto array_type = typeid_cast(arguments[0].get()); - if (!array_type) - throw Exception("First argument for function " + getName() + " must be an array but it has type " - + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + for (auto i : ext::range(0, arguments.size())) + { + auto array_type = typeid_cast(arguments[i].get()); + if (!array_type) + throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " + + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } return getLeastCommonType(arguments); } @@ -2837,4 +2841,95 @@ FunctionPtr FunctionArrayPopBack::create(const Context &) return std::make_shared(); } + +/// Implementation of FunctionArrayAllAny. + +FunctionPtr FunctionArrayHasAll::create(const Context & context) +{ + return std::make_shared(context); +} + +FunctionPtr FunctionArrayHasAny::create(const Context & context) +{ + return std::make_shared(context); +} + + +DataTypePtr FunctionArrayHasAllAny::getReturnTypeImpl(const DataTypes & arguments) const +{ + for (auto i : ext::range(0, arguments.size())) + { + auto array_type = typeid_cast(arguments[i].get()); + if (!array_type) + throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " + + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return std::make_shared(); +} + +void FunctionArrayHasAllAny::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) +{ + size_t rows = block.rows(); + size_t num_args = arguments.size(); + + auto result_column = ColumnUInt8::create(rows); + + DataTypePtr common_type = nullptr; + auto commonType = [& common_type, & block, & arguments]() + { + if (common_type == nullptr) + { + DataTypes data_types; + data_types.reserve(arguments.size()); + for (const auto & argument : arguments) + data_types.push_back(block.getByPosition(argument).type); + + common_type = getLeastCommonType(data_types); + } + + return common_type; + }; + + Columns preprocessed_columns(num_args); + + for (size_t i = 0; i < num_args; ++i) + { + const auto & argument = block.getByPosition(arguments[i]); + ColumnPtr preprocessed_column = argument.column; + + const auto argument_type = typeid_cast(argument.type.get()); + const auto & nested_type = argument_type->getNestedType(); + + /// Converts Array(Nothing) or Array(Nullable(Nothing) to common type. Example: hasAll([Null, 1], [Null]) -> 1 + if (typeid_cast(removeNullable(nested_type).get())) + preprocessed_column = castColumn(argument, commonType(), context); + + preprocessed_columns[i] = std::move(preprocessed_column); + } + + std::vector> sources; + + for (auto & argument_column : preprocessed_columns) + { + bool is_const = false; + + if (auto argument_column_const = typeid_cast(argument_column.get())) + { + is_const = true; + argument_column = argument_column_const->getDataColumnPtr(); + } + + if (auto argument_column_array = typeid_cast(argument_column.get())) + sources.emplace_back(createArraySource(*argument_column_array, is_const, rows)); + else + throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; + } + + auto result_column_ptr = typeid_cast(result_column.get()); + sliceHas(*sources[0], *sources[1], all, *result_column_ptr); + + block.getByPosition(result).column = std::move(result_column); +} + } diff --git a/dbms/src/Functions/FunctionsArray.h b/dbms/src/Functions/FunctionsArray.h index ca7cfe8e457..b0256bc629f 100644 --- a/dbms/src/Functions/FunctionsArray.h +++ b/dbms/src/Functions/FunctionsArray.h @@ -1540,6 +1540,48 @@ public: FunctionArrayPopBack() : FunctionArrayPop(false, name) {} }; +class FunctionArrayHasAllAny : public IFunction +{ +public: + FunctionArrayHasAllAny(const Context & context, bool all, const char * name) + : context(context), all(all), name(name) {} + + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; + + bool useDefaultImplementationForConstants() const override { return true; } + +private: + const Context & context; + bool all; + const char * name; +}; + +class FunctionArrayHasAll : public FunctionArrayHasAllAny +{ +public: + static constexpr auto name = "hasAll"; + + static FunctionPtr create(const Context & context); + + FunctionArrayHasAll(const Context & context) : FunctionArrayHasAllAny(context, true, name) {} +}; + +class FunctionArrayHasAny : public FunctionArrayHasAllAny +{ +public: + static constexpr auto name = "hasAny"; + + static FunctionPtr create(const Context & context); + + FunctionArrayHasAny(const Context & context) : FunctionArrayHasAllAny(context, false, name) {} +}; struct NameHas { static constexpr auto name = "has"; }; struct NameIndexOf { static constexpr auto name = "indexOf"; }; diff --git a/dbms/src/Functions/GatherUtils.h b/dbms/src/Functions/GatherUtils.h index cd5ca048353..ec9730487e5 100644 --- a/dbms/src/Functions/GatherUtils.h +++ b/dbms/src/Functions/GatherUtils.h @@ -1153,4 +1153,135 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con } } + +/// Methods to check if first array has elements from second array, overloaded for various combinations of types. + +template +bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second, + const UInt8 * first_null_map, const UInt8 * second_null_map) +{ + const bool has_first_null_map = first_null_map != nullptr; + const bool has_second_null_map = second_null_map != nullptr; + + for (size_t i = 0; i < second.size; ++i) + { + bool has = false; + for (size_t j = 0; j < first.size && !has; ++j) + { + const bool is_first_null = has_first_null_map && first_null_map[j]; + const bool is_second_null = has_second_null_map && second_null_map[i]; + + if (is_first_null && is_second_null) + has = true; + + if (!is_first_null && !is_second_null && isEqual(first, second, j, i)) + has = true; + } + + if (has && !all) + return true; + + if (!has && all) + return false; + + } + + return all; +} + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-compare" + +template +bool sliceEqualElements(const NumericArraySlice & first, const NumericArraySlice & second, size_t first_ind, size_t second_ind) +{ + return first.data[first_ind] == second.data[second_ind]; +} + +#pragma GCC diagnostic pop + +template +bool sliceEqualElements(const NumericArraySlice & first, const GenericArraySlice & second, size_t, size_t) +{ + return false; +} + +template +bool sliceEqualElements(const GenericArraySlice & first, const NumericArraySlice & second, size_t, size_t) +{ + return false; +} + +inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind) +{ + return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0; +} + +template +bool sliceHas(const NumericArraySlice & first, const NumericArraySlice & second) +{ + auto impl = sliceHasImpl, NumericArraySlice, sliceEqualElements>; + return impl(first, second, nullptr, nullptr); +} + +template +bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second) +{ + /// Generic arrays should have the same type in order to use column.compareAt(...) + if (typeid(*first.elements) != typeid(*second.elements)) + return false; + + auto impl = sliceHasImpl; + return impl(first, second, nullptr, nullptr); +} + +template +bool sliceHas(const GenericArraySlice & /*first*/, const NumericArraySlice & /*second*/) +{ + return false; +} + +template +bool sliceHas(const NumericArraySlice & /*first*/, const GenericArraySlice & /*second*/) +{ + return false; +} + +template +bool sliceHas(const FirstArraySlice & first, NullableArraySlice & second) +{ + auto impl = sliceHasImpl>; + return impl(first, second, nullptr, second.null_map); +} + +template +bool sliceHas(const NullableArraySlice & first, SecondArraySlice & second) +{ + auto impl = sliceHasImpl>; + return impl(first, second, first.null_map, nullptr); +} + +template +bool sliceHas(const NullableArraySlice & first, NullableArraySlice & second) +{ + auto impl = sliceHasImpl>; + return impl(first, second, first.null_map, second.null_map); +} + +template +void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnUInt8 & result) +{ + auto size = result.size(); + auto & data = result.getData(); + for (auto row : ext::range(0, size)) + { + data[row] = static_cast(sliceHas(first.getWhole(), second.getWhole()) ? 1 : 0); + first.next(); + second.next(); + } +} + +void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result); + } diff --git a/dbms/src/Functions/GatherUtils_has.cpp b/dbms/src/Functions/GatherUtils_has.cpp new file mode 100644 index 00000000000..73602acac72 --- /dev/null +++ b/dbms/src/Functions/GatherUtils_has.cpp @@ -0,0 +1,25 @@ +#include +#include + + +namespace DB +{ + +struct ArrayHasSelectArraySourcePair : public ArraySourcePairSelector +{ + template + static void selectSourcePair(FirstSource && first, SecondSource && second, bool all, ColumnUInt8 & result) + { + if (all) + arrayAllAny(first, second, result); + else + arrayAllAny(first, second, result); + } +}; + +void sliceHas(IArraySource & first, IArraySource & second, bool all, ColumnUInt8 & result) +{ + ArrayHasSelectArraySourcePair::select(first, second, all, result); +} + +} diff --git a/dbms/src/Functions/GatherUtils_selectors.h b/dbms/src/Functions/GatherUtils_selectors.h index 101544c5184..d1b844f69d6 100644 --- a/dbms/src/Functions/GatherUtils_selectors.h +++ b/dbms/src/Functions/GatherUtils_selectors.h @@ -109,4 +109,26 @@ struct ArraySinkSourceSelector } }; +template +struct ArraySourcePairSelector +{ + template + static void select(IArraySource & first, IArraySource & second, Args && ... args) + { + GetArraySourceSelector::select(first, second, args ...); + } + + template + static void selectImpl(FirstSource && first, IArraySource & second, Args && ... args) + { + GetArraySourceSelector::select(second, first, args ...); + } + + template + static void selectImpl(SecondSource && second, FirstSource && first, Args && ... args) + { + Base::selectSourcePair(first, second, args ...); + } +}; + }