From e80082c24ff10cb9bca10a82f8cf64f322ab7e4c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 16 Dec 2024 10:12:12 +0000 Subject: [PATCH] Add compat setting for sane NULL behavior in functions 'least' and 'greatest' --- src/Core/Settings.cpp | 11 ++-- src/Core/SettingsChangesHistory.cpp | 1 + src/Functions/LeastGreatestGeneric.h | 57 ++++++++++++++----- ...reatest_ignore_null_input_values.reference | 23 ++++++++ ...east_greatest_ignore_null_input_values.sql | 34 +++++++++++ 5 files changed, 107 insertions(+), 19 deletions(-) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index e2c2f16ff0c..5cd523eb2cd 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -3147,16 +3147,19 @@ Possible values: Allow execute multiIf function columnar )", 0) \ DECLARE(Bool, formatdatetime_f_prints_single_zero, false, R"( -Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds. +Formatter '%f' in function 'formatDateTime' prints a single zero instead of six zeros if the formatted value has no fractional seconds. )", 0) \ DECLARE(Bool, formatdatetime_parsedatetime_m_is_month_name, true, R"( -Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes. +Formatter '%M' in functions 'formatDateTime' and 'parseDateTime' print/parse the month name instead of minutes. )", 0) \ DECLARE(Bool, parsedatetime_parse_without_leading_zeros, true, R"( -Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros. +Formatters '%c', '%l' and '%k' in function 'parseDateTime' parse months and hours without leading zeros. )", 0) \ DECLARE(Bool, formatdatetime_format_without_leading_zeros, false, R"( -Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros. +Formatters '%c', '%l' and '%k' in function 'formatDateTime' print months and hours without leading zeros. +)", 0) \ + DECLARE(Bool, least_greatest_legacy_null_behavior, false, R"( +If enabled, functions 'least' and 'greatest' return NULL if one of their arguments is NULL. )", 0) \ \ DECLARE(UInt64, max_partitions_per_insert_block, 100, R"( diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 3ff4597755a..f42afabe63b 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -79,6 +79,7 @@ static std::initializer_list #include +#include #include #include +#include #include #include #include @@ -11,6 +13,11 @@ namespace DB { +namespace Setting +{ + extern const SettingsBool least_greatest_legacy_null_behavior; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -29,14 +36,21 @@ class FunctionLeastGreatestGeneric : public IFunction { public: static constexpr auto name = kind == LeastGreatest::Least ? "least" : "greatest"; - static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + static FunctionPtr create(ContextPtr context) { return std::make_shared>(context); } + + /// TODO Remove support for legacy NULL behavior (can be done end of 2026) + + explicit FunctionLeastGreatestGeneric(ContextPtr context) + : legacy_null_behavior(context->getSettingsRef()[Setting::least_greatest_legacy_null_behavior]) + { + } private: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNulls() const override { return legacy_null_behavior; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & types) const override @@ -55,15 +69,15 @@ private: Columns converted_columns; for (const auto & argument : arguments) { - if (argument.type->onlyNull()) + if (!legacy_null_behavior && argument.type->onlyNull()) continue; /// ignore NULL arguments auto converted_col = castColumn(argument, result_type)->convertToFullColumnIfConst(); - converted_columns.emplace_back(converted_col); + converted_columns.push_back(converted_col); } - if (converted_columns.empty()) + if (!legacy_null_behavior && converted_columns.empty()) return arguments[0].column; - else if (converted_columns.size() == 1) + else if (!legacy_null_behavior && converted_columns.size() == 1) return converted_columns[0]; auto result_column = result_type->createColumn(); @@ -93,6 +107,8 @@ private: return result_column; } + + bool legacy_null_behavior; }; template @@ -100,18 +116,18 @@ class LeastGreatestOverloadResolver : public IFunctionOverloadResolver { public: static constexpr auto name = kind == LeastGreatest::Least ? "least" : "greatest"; + static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique>(context); } - static FunctionOverloadResolverPtr create(ContextPtr context) + explicit LeastGreatestOverloadResolver(ContextPtr context_) + : context(context_) + , legacy_null_behavior(context->getSettingsRef()[Setting::least_greatest_legacy_null_behavior]) { - return std::make_unique>(context); } - explicit LeastGreatestOverloadResolver(ContextPtr context_) : context(context_) {} - String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } - bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNulls() const override { return legacy_null_behavior; } FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { @@ -120,8 +136,13 @@ public: argument_types.push_back(argument.type); /// More efficient specialization for two numeric arguments. - if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type)) - return std::make_unique(SpecializedFunction::create(context), argument_types, return_type); + if (arguments.size() == 2) + { + auto arg_0_type = legacy_null_behavior ? removeNullable(arguments[0].type) : arguments[0].type; + auto arg_1_type = legacy_null_behavior ? removeNullable(arguments[1].type) : arguments[1].type; + if (isNumber(arg_0_type) && isNumber(arg_1_type)) + return std::make_unique(SpecializedFunction::create(context), argument_types, return_type); + } return std::make_unique( FunctionLeastGreatestGeneric::create(context), argument_types, return_type); @@ -132,14 +153,20 @@ public: if (types.empty()) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} cannot be called without arguments", getName()); - if (types.size() == 2 && isNumber(types[0]) && isNumber(types[1])) - return SpecializedFunction::create(context)->getReturnTypeImpl(types); + if (types.size() == 2) + { + auto arg_0_type = legacy_null_behavior ? removeNullable(types[0]) : types[0]; + auto arg_1_type = legacy_null_behavior ? removeNullable(types[1]) : types[1]; + if (isNumber(arg_0_type) && isNumber(arg_1_type)) + return SpecializedFunction::create(context)->getReturnTypeImpl(types); + } return getLeastSupertype(types); } private: ContextPtr context; + bool legacy_null_behavior; }; } diff --git a/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.reference b/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.reference index 6dbf5ace437..eb334900131 100644 --- a/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.reference +++ b/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.reference @@ -1,3 +1,4 @@ +Test with default NULL behavior Test with one const argument \N \N Test with two const arguments @@ -19,3 +20,25 @@ a a Special cases 2 1 1 1 +Repeat above tests with legacy NULL behavior +Test with one const argument +\N \N +Test with two const arguments +\N \N +\N \N +\N \N +\N \N +\N \N +\N \N +Test with one non-const argument +\N \N +Test with two non-const arguments +\N \N +\N \N +\N \N +\N \N +\N \N +\N \N +Special cases +2 1 +\N \N diff --git a/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.sql b/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.sql index 68578821c0c..8b936cce659 100644 --- a/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.sql +++ b/tests/queries/0_stateless/03174_least_greatest_ignore_null_input_values.sql @@ -1,5 +1,39 @@ -- Tests functions "greatest" and "least" with NULL arguments +SELECT 'Test with default NULL behavior'; +SET least_greatest_legacy_null_behavior = default; + +SELECT 'Test with one const argument'; +SELECT greatest(NULL), least(NULL); + +SELECT 'Test with two const arguments'; +SELECT greatest(1, NULL), least(1, NULL); +SELECT greatest(NULL, 1), least(NULL, 1); +SELECT greatest(NULL, 1.1), least(NULL, 1.1); +SELECT greatest(1.1, NULL), least(1.1, NULL); +SELECT greatest(NULL, 'a'), least(NULL, 'a'); +SELECT greatest('a', NULL), least('a', NULL); + +SELECT 'Test with one non-const argument'; +SELECT greatest(materialize(NULL)), least(materialize(NULL)); + +SELECT 'Test with two non-const arguments'; +SELECT greatest(materialize(1), NULL), least(materialize(1), NULL); +SELECT greatest(materialize(NULL), 1), least(materialize(NULL), 1); +SELECT greatest(materialize(NULL), 1.1), least(materialize(NULL), 1.1); +SELECT greatest(materialize(1.1), NULL), least(materialize(1.1), NULL); +SELECT greatest(materialize(NULL), 'a'), least(materialize(NULL), 'a'); +SELECT greatest(materialize('a'), NULL), least(materialize('a'), NULL); + +SELECT 'Special cases'; +SELECT greatest(toNullable(1), 2), least(toNullable(1), 2); +SELECT greatest(toLowCardinality(1), NULL), least(toLowCardinality(1), NULL); + +-- ---------------------------------------------------------------------------- + +SELECT 'Repeat above tests with legacy NULL behavior'; +SET least_greatest_legacy_null_behavior = true; + SELECT 'Test with one const argument'; SELECT greatest(NULL), least(NULL);