From a5151fb49b6ec4dca9c38a154a47675541a33d54 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Sep 2021 17:52:08 +0300 Subject: [PATCH] Function accurateCastOrDefault updated implementation --- .../functions/date-time-functions.md | 2 +- .../functions/type-conversion-functions.md | 143 +++++++++++++++++- src/Functions/castOrDefault.cpp | 52 +++++-- .../extractTimeZoneFromFunctionArguments.cpp | 2 +- .../extractTimeZoneFromFunctionArguments.h | 2 + .../2026_accurate_cast_or_default.reference | 32 ++++ .../2026_accurate_cast_or_default.sql | 37 +++++ 7 files changed, 250 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/2026_accurate_cast_or_default.reference create mode 100644 tests/queries/0_stateless/2026_accurate_cast_or_default.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index db36a5b5637..f54ef635e0c 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -322,7 +322,7 @@ Truncates sub-seconds. **Syntax** ``` sql -toStartOfSecond(value[, timezone]) +toStartOfSecond(value, [timezone]) ``` **Arguments** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 40c864e6651..19fa41c56cb 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -92,7 +92,7 @@ Result: ## toInt(8\|16\|32\|64\|128\|256)OrDefault {#toint8163264128256orDefault} -It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default nonnull value you input(default value must have the same type with result). +It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns the default type value. **Example** @@ -161,6 +161,8 @@ Result: ## toFloat(32\|64)OrNull {#tofloat3264ornull} +## toFloat(32\|64)OrDefault {#tofloat3264ordefault} + ## toDate {#todate} Alias: `DATE`. @@ -169,23 +171,27 @@ Alias: `DATE`. ## toDateOrNull {#todateornull} +## toDateOrDefault {#todateordefault} + ## toDateTime {#todatetime} ## toDateTimeOrZero {#todatetimeorzero} ## toDateTimeOrNull {#todatetimeornull} +## toDateTimeOrDefault {#todatetimeordefault} + ## toDate32 {#todate32} Converts the argument to the [Date32](../../sql-reference/data-types/date32.md) data type. If the value is outside the range returns the border values supported by `Date32`. If the argument has [Date](../../sql-reference/data-types/date.md) type, borders of `Date` are taken into account. -**Syntax** +**Syntax** ``` sql toDate32(expr) ``` -**Arguments** +**Arguments** - `expr` — The value. [String](../../sql-reference/data-types/string.md), [UInt32](../../sql-reference/data-types/int-uint.md) or [Date](../../sql-reference/data-types/date.md). @@ -273,6 +279,26 @@ Result: └──────────────────────────────┴────────────────────┘ ``` +## toDate32OrDefault {#todate32-or-null} + +The same as [toDate32](#todate32) but returns default value if invalid argument is received. + +**Example** + +Query: + +``` sql +SELECT toDate32OrDefault('1955-01-01'), toDate32OrDefault(''); +``` + +Result: + +``` text +┌─toDate32OrDefault('1955-01-01')─┬─toDate32OrDefault('')─┐ +│ 1955-01-01 │ 1970-01-01 │ +└─────────────────────────────────┴───────────────────────┘ +``` + ## toDecimal(32\|64\|128\|256) {#todecimal3264128256} Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. @@ -335,6 +361,60 @@ Result: └──────┴────────────────────────────────────────────────────┘ ``` + +## toDecimal(32\|64\|128\|256)OrDefault {#todecimal3264128256ornull} + +Converts an input string to a [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type value. This family of functions include: + +- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. +- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. +- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. +- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. + +These functions should be used instead of `toDecimal*()` functions, if you prefer to get a default value instead of an exception in the event of an input value parsing error. + +**Arguments** + +- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. + +**Returned value** + +A value in the `Decimal(P,S)` data type. The value contains: + +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. + +**Examples** + +Query: + +``` sql +SELECT toDecimal32OrDefault(toString(-1.111), 5) AS val, toTypeName(val); +``` + +Result: + +``` text +┌────val─┬─toTypeName(toDecimal32OrDefault(toString(-1.111), 5))─┐ +│ -1.111 │ Decimal(9, 5) │ +└────────┴───────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT toDecimal32OrDefault(toString(-1.111), 2) AS val, toTypeName(val); +``` + +Result: + +``` text +┌─val─┬─toTypeName(toDecimal32OrDefault(toString(-1.111), 2))─┐ +│ 0 │ Decimal(9, 2) │ +└─────┴───────────────────────────────────────────────────────┘ +``` + ## toDecimal(32\|64\|128\|256)OrZero {#todecimal3264128256orzero} Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type. This family of functions include: @@ -774,6 +854,63 @@ Result: └───────┴──────┴──────────────┘ ``` + +## accurateCastOrDefault(x, T[, default_value]) {#type_conversion_function-accurate-cast_or_null} + +Converts input value `x` to the specified data type `T`. Returns default type value or `default_value` if specified if the casted value is not representable in the target type. + +**Syntax** + +```sql +accurateCastOrNull(x, T) +``` + +**Parameters** + +- `x` — Input value. +- `T` — The name of the returned data type. +- `default_value` - Default value of returned data type. + +**Returned value** + +- The value, converted to the specified data type `T`. + +**Example** + +Query: + +``` sql +SELECT toTypeName(accurateCastOrDefault(5, 'UInt8')); +``` + +Result: + +``` text +┌─toTypeName(accurateCastOrDefault(5, 'UInt8'))─┐ +│ UInt8 │ +└───────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT + accurateCastOrDefault(-1, 'UInt8') as uint8, + accurateCastOrDefault(-1, 'UInt8', 5) as uint8_default, + accurateCastOrDefault(128, 'Int8') as int8, + accurateCastOrDefault(128, 'Int8', 5) as int8_default, + accurateCastOrDefault('Test', 'FixedString(2)') as fixed_string, + accurateCastOrDefault('Test', 'FixedString(2)', 'Te') as fixed_string_default; +``` + +Result: + +``` text +┌─uint8─┬─uint8_default─┬─int8─┬─int8_default─┬─fixed_string─┬─fixed_string_default─┐ +│ 0 │ 5 │ 0 │ 5 │ │ Te │ +└───────┴───────────────┴──────┴──────────────┴──────────────┴──────────────────────┘ +``` + ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval} Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 20e001bcc29..ecc95f5265d 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -14,7 +15,7 @@ #include #include -#include +#include #include #include #include @@ -32,7 +33,7 @@ namespace ErrorCodes class FunctionCastOrDefault final : public IFunction { public: - static constexpr auto name = "castOrDefault"; + static constexpr auto name = "accurateCastOrDefault"; static FunctionPtr create(ContextPtr context) { @@ -42,7 +43,6 @@ public: explicit FunctionCastOrDefault(ContextPtr context_) : keep_nullable(context_->getSettingsRef().cast_keep_nullable) { - std::cerr << "FunctionCastOrDefault::constructor" << std::endl; } String getName() const override { return name; } @@ -52,7 +52,9 @@ public: ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool useDefaultImplementationForConstants() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { @@ -116,7 +118,9 @@ public: if (!is_current_index_null) continue; - result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); + if (i - start_insert_index > 0) + result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); + result->insertFrom(*default_column_casted, i); start_insert_index = i + 1; } @@ -129,7 +133,9 @@ public: if (!is_current_index_null) continue; - result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); + if (i - start_insert_index > 0) + result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); + result->insertDefault(); start_insert_index = i + 1; } @@ -167,11 +173,27 @@ private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool useDefaultImplementationForNulls() const override { return impl.useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return impl.useDefaultImplementationForLowCardinalityColumns();} + bool useDefaultImplementationForConstants() const override { return impl.useDefaultImplementationForConstants();} + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override + { + return impl.isSuitableForShortCircuitArgumentsExecution(arguments); + } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors optional_args; + + if constexpr (IsDataTypeDecimal) + mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"}); + + if (std::is_same_v || std::is_same_v) + optional_args.push_back({"timezone", &isString, &isColumnConst, "const String"}); + + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + size_t additional_argument_index = 1; size_t scale = 0; @@ -179,9 +201,6 @@ private: if constexpr (IsDataTypeDecimal) { - if (additional_argument_index < arguments.size()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} for decimal type requires additionae scale argument {}", getName()); - const auto & scale_argument = arguments[additional_argument_index]; WhichDataType scale_argument_type(scale_argument.type); @@ -200,10 +219,10 @@ private: if constexpr (std::is_same_v || std::is_same_v) { if (additional_argument_index < arguments.size()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} for DateTime or DateTime64 requires additional timezone argument {}", getName()); - - time_zone = extractTimeZoneNameFromFunctionArguments(arguments, additional_argument_index, 0); - ++additional_argument_index; + { + time_zone = extractTimeZoneNameFromColumn(*arguments[additional_argument_index].column); + ++additional_argument_index; + } } std::shared_ptr cast_type; @@ -283,6 +302,7 @@ struct NameToInt256OrDefault { static constexpr auto name = "toInt256OrDefault"; struct NameToFloat32OrDefault { static constexpr auto name = "toFloat32OrDefault"; }; struct NameToFloat64OrDefault { static constexpr auto name = "toFloat64OrDefault"; }; struct NameToDateOrDefault { static constexpr auto name = "toDateOrDefault"; }; +struct NameToDate32OrDefault { static constexpr auto name = "toDate32OrDefault"; }; struct NameToDateTimeOrDefault { static constexpr auto name = "toDateTimeOrDefault"; }; struct NameToDateTime64OrDefault { static constexpr auto name = "toDateTime64OrDefault"; }; struct NameToDecimal32OrDefault { static constexpr auto name = "toDecimal32OrDefault"; }; @@ -308,6 +328,7 @@ using FunctionToFloat32OrDefault = FunctionCastOrDefaultTyped; using FunctionToDateOrDefault = FunctionCastOrDefaultTyped; +using FunctionToDate32OrDefault = FunctionCastOrDefaultTyped; using FunctionToDateTimeOrDefault = FunctionCastOrDefaultTyped; using FunctionToDateTime64OrDefault = FunctionCastOrDefaultTyped; @@ -339,6 +360,7 @@ void registerFunctionCastOrDefault(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.cpp b/src/Functions/extractTimeZoneFromFunctionArguments.cpp index 19c02ed2390..9862c80f3c3 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp +++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes } -static std::string extractTimeZoneNameFromColumn(const IColumn & column) +std::string extractTimeZoneNameFromColumn(const IColumn & column) { const ColumnConst * time_zone_column = checkAndGetColumnConst(&column); diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.h b/src/Functions/extractTimeZoneFromFunctionArguments.h index 9dd3fdd4903..751d7a463f1 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.h +++ b/src/Functions/extractTimeZoneFromFunctionArguments.h @@ -12,6 +12,8 @@ namespace DB class Block; +std::string extractTimeZoneNameFromColumn(const IColumn & column); + /// Determine working timezone either from optional argument with time zone name or from time zone in DateTime type of argument. /// Returns empty string if default time zone should be used. std::string extractTimeZoneNameFromFunctionArguments( diff --git a/tests/queries/0_stateless/2026_accurate_cast_or_default.reference b/tests/queries/0_stateless/2026_accurate_cast_or_default.reference new file mode 100644 index 00000000000..a3da9ca532c --- /dev/null +++ b/tests/queries/0_stateless/2026_accurate_cast_or_default.reference @@ -0,0 +1,32 @@ +0 5 +5 +0 5 +0 5 +5 +0 5 +0 5 +5 +0 5 +0 5 +5 +0 5 +5 +0 5 +5 +0 0 +0 2 +1 +0 2 +\0\0 12 +0 5 +0 5 +0 5 +0 5 +0 5 +0 5 +0 5 +0 5 +0 5 +0 5 +127 127 +0 5 diff --git a/tests/queries/0_stateless/2026_accurate_cast_or_default.sql b/tests/queries/0_stateless/2026_accurate_cast_or_default.sql new file mode 100644 index 00000000000..eca7cea99ba --- /dev/null +++ b/tests/queries/0_stateless/2026_accurate_cast_or_default.sql @@ -0,0 +1,37 @@ +SELECT accurateCastOrDefault(-1, 'UInt8'), accurateCastOrDefault(5, 'UInt8'); +SELECT accurateCastOrDefault(5, 'UInt8'); +SELECT accurateCastOrDefault(257, 'UInt8'), accurateCastOrDefault(257, 'UInt8', 5); +SELECT accurateCastOrDefault(-1, 'UInt16'), accurateCastOrDefault(-1, 'UInt16', 5); +SELECT accurateCastOrDefault(5, 'UInt16'); +SELECT accurateCastOrDefault(65536, 'UInt16'), accurateCastOrDefault(65536, 'UInt16', 5); +SELECT accurateCastOrDefault(-1, 'UInt32'), accurateCastOrDefault(-1, 'UInt32', 5); +SELECT accurateCastOrDefault(5, 'UInt32'); +SELECT accurateCastOrDefault(4294967296, 'UInt32'), accurateCastOrDefault(4294967296, 'UInt32', 5); +SELECT accurateCastOrDefault(-1, 'UInt64'), accurateCastOrDefault(-1, 'UInt64', 5); +SELECT accurateCastOrDefault(5, 'UInt64'); +SELECT accurateCastOrDefault(-1, 'UInt256'), accurateCastOrDefault(-1, 'UInt256', 5); +SELECT accurateCastOrDefault(5, 'UInt256'); + +SELECT accurateCastOrDefault(-129, 'Int8'), accurateCastOrDefault(-129, 'Int8', 5); +SELECT accurateCastOrDefault(5, 'Int8'); +SELECT accurateCastOrDefault(128, 'Int8'), accurateCastOrDefault(128, 'Int8'); + +SELECT accurateCastOrDefault(10, 'Decimal32(9)'), accurateCastOrDefault(10, 'Decimal32(9)', 2); +SELECT accurateCastOrDefault(1, 'Decimal32(9)'); +SELECT accurateCastOrDefault(-10, 'Decimal32(9)'), accurateCastOrDefault(-10, 'Decimal32(9)', 2); + +SELECT accurateCastOrDefault('123', 'FixedString(2)'), accurateCastOrDefault('123', 'FixedString(2)', '12'); + +SELECT accurateCastOrDefault(inf, 'Int64'), accurateCastOrDefault(inf, 'Int64', 5); +SELECT accurateCastOrDefault(inf, 'Int128'), accurateCastOrDefault(inf, 'Int128', 5); +SELECT accurateCastOrDefault(inf, 'Int256'), accurateCastOrDefault(inf, 'Int256', 5); +SELECT accurateCastOrDefault(nan, 'Int64'), accurateCastOrDefault(nan, 'Int64', 5); +SELECT accurateCastOrDefault(nan, 'Int128'), accurateCastOrDefault(nan, 'Int128', 5); +SELECT accurateCastOrDefault(nan, 'Int256'), accurateCastOrDefault(nan, 'Int256', 5); + +SELECT accurateCastOrDefault(inf, 'UInt64'), accurateCastOrDefault(inf, 'UInt64', 5); +SELECT accurateCastOrDefault(inf, 'UInt256'), accurateCastOrDefault(inf, 'UInt256', 5); +SELECT accurateCastOrDefault(nan, 'UInt64'), accurateCastOrDefault(nan, 'UInt64', 5); +SELECT accurateCastOrDefault(nan, 'UInt256'), accurateCastOrDefault(nan, 'UInt256', 5); + +SELECT accurateCastOrDefault(number + 127, 'Int8') AS x, accurateCastOrDefault(number + 127, 'Int8', 5) AS x_with_default FROM numbers (2) ORDER BY number;