From 1df5c7cedf9e752911fb952c32588ca1c5bcd48d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 Apr 2020 01:28:08 +0300 Subject: [PATCH 1/5] Added generic variants of least and greatest functions #4767 --- src/Columns/IColumn.h | 2 +- src/Functions/LeastGreatestGeneric.h | 136 +++++++++++++++++++++++++++ src/Functions/greatest.cpp | 4 +- src/Functions/least.cpp | 4 +- 4 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 src/Functions/LeastGreatestGeneric.h diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 090537d6770..4af593bb658 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -44,7 +44,7 @@ public: /// Name of a Column kind, without parameters (example: FixedString, Array). virtual const char * getFamilyName() const = 0; - /** If column isn't constant, returns nullptr (or itself). + /** If column isn't constant, returns itself. * If column is constant, transforms constant to full column (if column type allows such transform) and return it. */ virtual Ptr convertToFullColumnIfConst() const { return getPtr(); } diff --git a/src/Functions/LeastGreatestGeneric.h b/src/Functions/LeastGreatestGeneric.h new file mode 100644 index 00000000000..8e7eb555b3e --- /dev/null +++ b/src/Functions/LeastGreatestGeneric.h @@ -0,0 +1,136 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +enum class LeastGreatest +{ + Least, + Greatest +}; + + +template +class FunctionLeastGreatestGeneric : public IFunction +{ +public: + static constexpr auto name = kind == LeastGreatest::Least ? "least" : "greatest"; + static FunctionPtr create(const Context &) { return std::make_shared>(); } + +private: + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & types) const override + { + if (types.empty()) + throw Exception("Function " + getName() + " cannot be called without arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return getLeastSupertype(types); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + size_t num_arguments = arguments.size(); + if (1 == num_arguments) + { + block.getByPosition(result).column = block.getByPosition(arguments[0]).column; + return; + } + + auto result_type = block.getByPosition(result).type; + + Columns converted_columns(num_arguments); + for (size_t arg = 0; arg < num_arguments; ++arg) + converted_columns[arg] = castColumn(block.getByPosition(arguments[arg]), result_type)->convertToFullColumnIfConst(); + + auto result_column = result_type->createColumn(); + result_column->reserve(input_rows_count); + + for (size_t row_num = 0; row_num < input_rows_count; ++row_num) + { + size_t best_arg = 0; + for (size_t arg = 1; arg < num_arguments; ++arg) + { + auto cmp_result = converted_columns[arg]->compareAt(row_num, row_num, *converted_columns[best_arg], 1); + + if constexpr (kind == LeastGreatest::Least) + { + if (cmp_result < 0) + best_arg = arg; + } + else + { + if (cmp_result > 0) + best_arg = arg; + } + } + + result_column->insertFrom(*converted_columns[best_arg], row_num); + } + + block.getByPosition(result).column = std::move(result_column); + } +}; + + +template +class LeastGreatestOverloadResolver : public IFunctionOverloadResolverImpl +{ +public: + static constexpr auto name = kind == LeastGreatest::Least ? "least" : "greatest"; + + static FunctionOverloadResolverImplPtr create(const Context & context) + { + return std::make_unique>(context); + } + + explicit LeastGreatestOverloadResolver(const Context & context_) : context(context_) {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + + FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + DataTypes argument_types; + + /// More efficient specialization for two numeric arguments. + if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type)) + return std::make_unique(SpecializedFunction::create(context), argument_types, return_type); + + return std::make_unique( + FunctionLeastGreatestGeneric::create(context), argument_types, return_type); + } + + DataTypePtr getReturnType(const DataTypes & types) const override + { + if (types.empty()) + throw Exception("Function " + getName() + " cannot be called without arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return getLeastSupertype(types); + } + +private: + const Context & context; +}; + +} + + diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 9abf85e751b..63f08d0affe 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -1,6 +1,8 @@ #include #include #include +#include + namespace DB { @@ -57,7 +59,7 @@ using FunctionGreatest = FunctionBinaryArithmetic; void registerFunctionGreatest(FunctionFactory & factory) { - factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f2e7c1f15d2..ba87e4bd7e4 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -1,6 +1,8 @@ #include #include #include +#include + namespace DB { @@ -57,7 +59,7 @@ using FunctionLeast = FunctionBinaryArithmetic; void registerFunctionLeast(FunctionFactory & factory) { - factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); } } From 5b5de975e1758374ac071003a505f934419cf92b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 Apr 2020 01:38:27 +0300 Subject: [PATCH 2/5] Added a test #4767 --- .../01246_least_greatest_generic.reference | 22 ++++++++++++ .../01246_least_greatest_generic.sql | 34 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 tests/queries/0_stateless/01246_least_greatest_generic.reference create mode 100644 tests/queries/0_stateless/01246_least_greatest_generic.sql diff --git a/tests/queries/0_stateless/01246_least_greatest_generic.reference b/tests/queries/0_stateless/01246_least_greatest_generic.reference new file mode 100644 index 00000000000..24c2233eed2 --- /dev/null +++ b/tests/queries/0_stateless/01246_least_greatest_generic.reference @@ -0,0 +1,22 @@ +hello +world + +z +hello +world +1 +\N +\N +nan +inf +-0 +123 +-1 +4294967295 +['world'] +[[[]]] +[[[],[]]] +[] +[NULL] +[0] +[NULL] diff --git a/tests/queries/0_stateless/01246_least_greatest_generic.sql b/tests/queries/0_stateless/01246_least_greatest_generic.sql new file mode 100644 index 00000000000..74d095a66e7 --- /dev/null +++ b/tests/queries/0_stateless/01246_least_greatest_generic.sql @@ -0,0 +1,34 @@ +SELECT least('hello', 'world'); +SELECT greatest('hello', 'world'); +SELECT least('hello', 'world', ''); +SELECT greatest('hello', 'world', 'z'); + +SELECT least('hello'); +SELECT greatest('world'); + +SELECT least(1, inf, nan); +SELECT least(1, inf, nan, NULL); +SELECT greatest(1, inf, nan, NULL); +SELECT greatest(1, inf, nan); +SELECT greatest(1, inf); + +SELECT least(0., -0.); +SELECT least(toNullable(123), 456); + +SELECT LEAST(-1, 18446744073709551615); -- { serverError 386 } +SELECT LEAST(-1., 18446744073709551615); -- { serverError 386 } +SELECT LEAST(-1., 18446744073709551615.); +SELECT greatest(-1, 1, 4294967295); + +SELECT greatest([], ['hello'], ['world']); + +SELECT least([[[], []]], [[[]]], [[[]], [[]]]); +SELECT greatest([[[], []]], [[[]]], [[[]], [[]]]); + +SELECT least([], [NULL]); +SELECT greatest([], [NULL]); + +SELECT LEAST([NULL], [0]); +SELECT GREATEST([NULL], [0]); + +SELECT Greatest(); -- { serverError 42 } From 67790a74e54f7af93a55375aa978683194011241 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 17 Apr 2020 06:13:21 +0300 Subject: [PATCH 3/5] Fix test --- src/Functions/LeastGreatestGeneric.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Functions/LeastGreatestGeneric.h b/src/Functions/LeastGreatestGeneric.h index 8e7eb555b3e..2d6d71b20c7 100644 --- a/src/Functions/LeastGreatestGeneric.h +++ b/src/Functions/LeastGreatestGeneric.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -124,6 +125,9 @@ public: if (types.empty()) throw Exception("Function " + getName() + " cannot be called without arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (types.size() == 2 && isNumber(types[0]) && isNumber(types[1])) + return SpecializedFunction::create(context)->getReturnTypeImpl(types); + return getLeastSupertype(types); } From 71a7a74d259228e9794953eba6f31e0a4d570c1f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Apr 2020 15:31:59 +0300 Subject: [PATCH 4/5] Fix test --- tests/queries/0_stateless/01246_least_greatest_generic.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01246_least_greatest_generic.sql b/tests/queries/0_stateless/01246_least_greatest_generic.sql index 74d095a66e7..f0dceabfcb5 100644 --- a/tests/queries/0_stateless/01246_least_greatest_generic.sql +++ b/tests/queries/0_stateless/01246_least_greatest_generic.sql @@ -15,8 +15,10 @@ SELECT greatest(1, inf); SELECT least(0., -0.); SELECT least(toNullable(123), 456); -SELECT LEAST(-1, 18446744073709551615); -- { serverError 386 } -SELECT LEAST(-1., 18446744073709551615); -- { serverError 386 } +-- This can be improved +SELECT LEAST(-1, 18446744073709551615); -- { serverError 43 } +SELECT LEAST(-1., 18446744073709551615); -- { serverError 43 } + SELECT LEAST(-1., 18446744073709551615.); SELECT greatest(-1, 1, 4294967295); From 4f245dc2867a5557d8212eb9ff8cefff839c549f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 Apr 2020 15:35:00 +0300 Subject: [PATCH 5/5] Added performance test --- tests/performance/least_greatest_hits.xml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/performance/least_greatest_hits.xml diff --git a/tests/performance/least_greatest_hits.xml b/tests/performance/least_greatest_hits.xml new file mode 100644 index 00000000000..464656b0201 --- /dev/null +++ b/tests/performance/least_greatest_hits.xml @@ -0,0 +1,9 @@ + + + test.hits + + + SELECT count() FROM test.hits WHERE NOT ignore(least(URL, Referer)) + SELECT count() FROM test.hits WHERE NOT ignore(greatest(URL, Referer, Title)) + SELECT count() FROM test.hits WHERE NOT ignore(greatest(ClientIP, RemoteIP)) +