From cd94eda70457bac2e13534d8bf58d42c855af649 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 14 Nov 2023 13:25:47 +0000 Subject: [PATCH 01/79] Simplify --- src/Functions/toStartOfInterval.cpp | 130 +++++++++++++++++----------- 1 file changed, 78 insertions(+), 52 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 48bf88cb14c..0a3a409ea9f 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -324,38 +324,61 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - bool first_argument_is_date = false; + bool value_is_date = false; auto check_first_argument = [&] { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + const DataTypePtr & type_arg1 = arguments[0].type; + if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Should be a date or a date with time", arguments[0].type->getName(), getName()); - first_argument_is_date = isDate(arguments[0].type); + "Should be a date or a date with time", type_arg1->getName(), getName()); + value_is_date = isDate(type_arg1); }; const DataTypeInterval * interval_type = nullptr; - bool result_type_is_date = false; - bool result_type_is_datetime = false; - auto check_interval_argument = [&] + enum class ResultType { - interval_type = checkAndGetDataType(arguments[1].type.get()); + Date, + DateTime, + DateTime64 + }; + ResultType result_type; + auto check_second_argument = [&] + { + const DataTypePtr & type_arg2 = arguments[1].type; + interval_type = checkAndGetDataType(type_arg2.get()); if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Should be an interval of time", arguments[1].type->getName(), getName()); - result_type_is_date = (interval_type->getKind() == IntervalKind::Year) - || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) - || (interval_type->getKind() == IntervalKind::Week); - result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour) - || (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second); + "Should be an interval of time", type_arg2->getName(), getName()); + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) + { + case IntervalKind::Nanosecond: + case IntervalKind::Microsecond: + case IntervalKind::Millisecond: + result_type = ResultType::DateTime64; + break; + case IntervalKind::Second: + case IntervalKind::Minute: + case IntervalKind::Hour: + case IntervalKind::Day: + result_type = ResultType::DateTime; + break; + case IntervalKind::Week: + case IntervalKind::Month: + case IntervalKind::Quarter: + case IntervalKind::Year: + result_type = ResultType::Date; + break; + } }; - auto check_timezone_argument = [&] + auto check_third_argument = [&] { - if (!WhichDataType(arguments[2].type).isString()) + const DataTypePtr & type_arg3 = arguments[2].type; + if (!isString(type_arg3)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " "This argument is optional and must be a constant string with timezone name", - arguments[2].type->getName(), getName()); - if (first_argument_is_date && result_type_is_date) + type_arg3->getName(), getName()); + if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " "has the type DateTime or DateTime64", @@ -365,13 +388,13 @@ public: if (arguments.size() == 2) { check_first_argument(); - check_interval_argument(); + check_second_argument(); } else if (arguments.size() == 3) { check_first_argument(); - check_interval_argument(); - check_timezone_argument(); + check_second_argument(); + check_third_argument(); } else { @@ -380,24 +403,27 @@ public: getName(), arguments.size()); } - if (result_type_is_date) - return std::make_shared(); - else if (result_type_is_datetime) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); - else + switch (result_type) { - auto scale = 0; + case ResultType::Date: + return std::make_shared(); + case ResultType::DateTime: + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + case ResultType::DateTime64: + { + UInt32 scale = 0; + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; - if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; - else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; - else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + } } + std::unreachable(); } bool useDefaultImplementationForConstants() const override { return true; } @@ -426,34 +452,34 @@ private: ColumnPtr dispatchForColumns( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { - const auto & from_datatype = *time_column.type.get(); - const auto which_type = WhichDataType(from_datatype); + const auto & time_column_type = *time_column.type.get(); + const auto & time_column_col = *time_column.column.get(); - if (which_type.isDateTime64()) + if (isDateTime64(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); - auto scale = assert_cast(from_datatype).getScale(); + const auto * time_column_vec = checkAndGetColumn(time_column_col); + auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale); } - if (which_type.isDateTime()) + else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } - if (which_type.isDate()) + else if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } - if (which_type.isDate32()) + else if (isDate32(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " "Must contain dates or dates with time", getName()); @@ -502,7 +528,7 @@ private: return execute(from, time_column, num_units, result_type, time_zone, scale); } - UNREACHABLE(); + std::unreachable(); } template @@ -515,7 +541,7 @@ private: size_t size = time_data.size(); auto result_col = result_type->createColumn(); - auto *col_to = assert_cast(result_col.get()); + auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); result_data.resize(size); From aef1ea3df71d7d19c6267f6bb03ce994b7e14909 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 14 Nov 2023 17:49:08 +0000 Subject: [PATCH 02/79] Extend with origin argument, pt. I --- src/Functions/toStartOfInterval.cpp | 128 +++++++++++++++++++++------- 1 file changed, 95 insertions(+), 33 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 0a3a409ea9f..4d716637932 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -321,6 +321,11 @@ public: size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } + + bool hasInformationAboutMonotonicity() const override { return true; } + Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override { return { .is_monotonic = true, .is_always_monotonic = true }; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { @@ -329,7 +334,7 @@ public: { const DataTypePtr & type_arg1 = arguments[0].type; if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1st argument of function {}. " "Should be a date or a date with time", type_arg1->getName(), getName()); value_is_date = isDate(type_arg1); }; @@ -347,7 +352,7 @@ public: const DataTypePtr & type_arg2 = arguments[1].type; interval_type = checkAndGetDataType(type_arg2.get()); if (!interval_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. " "Should be an interval of time", type_arg2->getName(), getName()); switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { @@ -371,18 +376,48 @@ public: } }; + enum class ThirdArgument + { + IsTimezone, + IsOrigin + }; + ThirdArgument third_argument; /// valid only if 3rd argument is given auto check_third_argument = [&] { const DataTypePtr & type_arg3 = arguments[2].type; - if (!isString(type_arg3)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "This argument is optional and must be a constant string with timezone name", + if (isString(type_arg3)) + { + third_argument = ThirdArgument::IsTimezone; + if (value_is_date && result_type == ResultType::Date) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + getName(), interval_type->getKind().toString()); + } + else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) + third_argument = ThirdArgument::IsOrigin; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " + "This argument is optional and must be a constant String with timezone name or a Date/Date32/DateTime/DateTime64 with a constant origin", type_arg3->getName(), getName()); + + }; + + auto check_fourth_argument = [&] + { + if (third_argument != ThirdArgument::IsOrigin) /// sanity check + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " + "The third argument must a Date/Date32/DateTime/DateTime64 with a constant origin", + arguments[2].type->getName(), getName()); + + const DataTypePtr & type_arg4 = arguments[3].type; + if (!isString(type_arg4)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 4th argument of function {}. " + "This argument is optional and must be a constant String with timezone name", + type_arg4->getName(), getName()); if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " - "has the type DateTime or DateTime64", - getName(), interval_type->getKind().toString()); + "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + getName(), interval_type->getKind().toString()); }; if (arguments.size() == 2) @@ -396,10 +431,17 @@ public: check_second_argument(); check_third_argument(); } + else if (arguments.size() == 4) + { + check_first_argument(); + check_second_argument(); + check_third_argument(); + check_fourth_argument(); + } else { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + "Number of arguments for function {} doesn't match: passed {}, should be 2, 3 or 4", getName(), arguments.size()); } @@ -408,7 +450,10 @@ public: case ResultType::Date: return std::make_shared(); case ResultType::DateTime: - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + { + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); + } case ResultType::DateTime64: { UInt32 scale = 0; @@ -419,7 +464,8 @@ public: else if (interval_type->getKind() == IntervalKind::Millisecond) scale = 3; - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); } } @@ -433,8 +479,16 @@ public: { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; - const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); + + ColumnWithTypeAndName origin_column; + const bool has_origin_arg = (arguments.size() == 3 && isDateOrDate32OrDateTimeOrDateTime64(arguments[2].type)) || arguments.size() == 4; + if (has_origin_arg) + origin_column = arguments[2]; + + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; + const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); + + auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); return result_column; } @@ -449,8 +503,8 @@ public: } private: - ColumnPtr dispatchForColumns( - const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const + ColumnPtr dispatchForTimeColumn( + const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { const auto & time_column_type = *time_column.type.get(); const auto & time_column_col = *time_column.column.get(); @@ -461,25 +515,25 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } else if (isDateTime(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " "Must contain dates or dates with time", getName()); @@ -487,7 +541,7 @@ private: template ColumnPtr dispatchForIntervalColumn( - const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, + const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); @@ -505,34 +559,34 @@ private: switch (interval_type->getKind()) { case IntervalKind::Nanosecond: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Microsecond: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Millisecond: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Second: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Day: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Week: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Month: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Year: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); } - template - ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + template + ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, [[maybe_unused]] const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; using ToFieldType = typename ToDataType::FieldType; @@ -547,6 +601,14 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + /// TODO: This part is missing. origin_column is either {} (<-- to check, you could do `origin_column.column == nullptr`) or not {} + /// In the former case, we can execute below existing code. + /// In the latter case, we need to read the actual origin value. As per `getArgumentsThatAreAlwaysConstant()` (see above), we + /// can be sure that origin_column is a `ColumnConst`. The second assumption we can reasonable make is that it has the same + /// type (Date/Date32/DateTime/DateTime64) as the time column (1st argument). Since the method we are in is already + /// templatized on the data type of the time column, we can use `checkAndGetColumnConst(...)` to cast + /// `origin_column.column` to a const column and then read the (const) value from it, and proceed with the calculations. + for (size_t i = 0; i != size; ++i) result_data[i] = static_cast( Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); From 188c469318e8a7a7e339bcf4048952181e4e324c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 Nov 2023 14:22:39 +0000 Subject: [PATCH 03/79] done suggestings about logic --- src/Functions/toStartOfInterval.cpp | 341 +++------------------------- 1 file changed, 27 insertions(+), 314 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 4d716637932..29f8faa810c 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include #include @@ -21,300 +23,18 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; - extern const int DECIMAL_OVERFLOW; } namespace { - constexpr auto function_name = "toStartOfInterval"; - - template - struct Transform; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(DayNum(d), years); - } - - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); - } - - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); - } - - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); - } - }; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); - } - - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); - } - - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); - } - - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); - } - }; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(DayNum(d), months); - } - - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); - } - - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); - } - - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); - } - }; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(DayNum(d), weeks); - } - - static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); - } - - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); - } - - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); - } - - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfHourInterval(t, hours); - } - - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMinuteInterval(t, minutes); - } - - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000) - { - Int64 t_milliseconds = 0; - if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_milliseconds / milliseconds * milliseconds; - else - return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; - } - else if (scale_multiplier > 1000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000); - if (likely(t >= 0)) - return t / milliseconds / scale_diff * milliseconds; - else - return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; - } - else - if (likely(t >= 0)) - return t / milliseconds * milliseconds; - else - return ((t + 1) / milliseconds - 1) * milliseconds; - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000) - { - Int64 t_microseconds = 0; - if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_microseconds / microseconds * microseconds; - else - return ((t_microseconds + 1) / microseconds - 1) * microseconds; - } - else if (scale_multiplier > 1000000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000000); - if (likely(t >= 0)) - return t / microseconds / scale_diff * microseconds; - else - return ((t + 1) / microseconds / scale_diff - 1) * microseconds; - } - else - if (likely(t >= 0)) - return t / microseconds * microseconds; - else - return ((t + 1) / microseconds - 1) * microseconds; - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000000) - { - Int64 t_nanoseconds = 0; - if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_nanoseconds / nanoseconds * nanoseconds; - else - return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; - } - else - if (likely(t >= 0)) - return t / nanoseconds * nanoseconds; - else - return ((t + 1) / nanoseconds - 1) * nanoseconds; - } - }; class FunctionToStartOfInterval : public IFunction { public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static constexpr auto name = function_name; + static constexpr auto name = "toStartOfInterval"; String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -472,9 +192,6 @@ public: std::unreachable(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { const auto & time_column = arguments[0]; @@ -485,23 +202,13 @@ public: if (has_origin_arg) origin_column = arguments[2]; - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; + const size_t time_zone_arg_num = (arguments.size() == 3 && isString(arguments[2].type)) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); return result_column; } - bool hasInformationAboutMonotonicity() const override - { - return true; - } - - Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override - { - return { .is_monotonic = true, .is_always_monotonic = true }; - } - private: ColumnPtr dispatchForTimeColumn( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const @@ -535,11 +242,10 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " - "Must contain dates or dates with time", getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName()); } - template + template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const @@ -547,16 +253,16 @@ private: const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); + const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be a const interval of time.", - getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be a const interval of time.", getName()); + Int64 num_units = interval_column_const_int64->getValue(); if (num_units <= 0) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); - switch (interval_type->getKind()) + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); @@ -601,17 +307,24 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - /// TODO: This part is missing. origin_column is either {} (<-- to check, you could do `origin_column.column == nullptr`) or not {} - /// In the former case, we can execute below existing code. - /// In the latter case, we need to read the actual origin value. As per `getArgumentsThatAreAlwaysConstant()` (see above), we - /// can be sure that origin_column is a `ColumnConst`. The second assumption we can reasonable make is that it has the same - /// type (Date/Date32/DateTime/DateTime64) as the time column (1st argument). Since the method we are in is already - /// templatized on the data type of the time column, we can use `checkAndGetColumnConst(...)` to cast - /// `origin_column.column` to a const column and then read the (const) value from it, and proceed with the calculations. + if (origin_column.column == nullptr) + for (size_t i = 0; i != size; ++i) + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + else + { + UInt64 od = origin_column.column->get64(0); + + for (size_t i = 0; i != size; ++i) + { + auto td = time_data[i]; + if (od > size_t(td)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); + td -= od; + result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); - for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast( - Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] += scale_multiplier == 10 ? od : od / scale_multiplier; + } + } return result_col; } From 6605a375b642d0bc8b6ffccbd0400eb4ccee223e Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 Nov 2023 17:35:04 +0000 Subject: [PATCH 04/79] style check, docs, tests and modified logic --- .../functions/date-time-functions.md | 7 +- src/Functions/toStartOfInterval.cpp | 306 +++++++++++++++++- ...tart_of_interval_origin_overload.reference | 15 + ...6_to_start_of_interval_origin_overload.sql | 12 + 4 files changed, 335 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference create mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 0364a610404..3f522eeb164 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -590,7 +590,7 @@ Rounds down a date with time to the start of the ten-minute interval. Rounds down the date with time to the start of the fifteen-minute interval. -## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\]) +## toStartOfInterval(time_or_data, INTERVAL x unit \[, origin_time_or_data\] \[, time_zone\]) This is a generalization of other functions named `toStartOf*`. For example, `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, @@ -598,6 +598,11 @@ This is a generalization of other functions named `toStartOf*`. For example, `toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`, `toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)` etc. +Also it has an overload including `origin_time_or_data` column which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function: +``` SQL +SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), toIntervalMinute(1), toDateTime('2023-01-01 14:35:30')); +``` + ## toTime Converts a date with time to a certain fixed date, while preserving the time. diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 29f8faa810c..8529643ee70 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -23,11 +24,292 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int BAD_ARGUMENTS; } namespace { +constexpr auto function_name = "toStartOfInterval"; + +template +struct Transform; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfYearInterval(DayNum(d), years); + } + + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); + } + + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); + } + + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); + } +}; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); + } + + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); + } + + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); + } + + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); + } +}; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMonthInterval(DayNum(d), months); + } + + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); + } + + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); + } + + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + } +}; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfWeekInterval(DayNum(d), weeks); + } +static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); + } + + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); + } + + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) + { + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); + } + + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) + { + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); + } + + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) + { + return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); + } + + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfHourInterval(t, hours); + } + + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMinuteInterval(t, minutes); + } + + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000) + { + Int64 t_milliseconds = 0; + if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); + if (likely(t >= 0)) + return t_milliseconds / milliseconds * milliseconds; + else + return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; + } + else if (scale_multiplier > 1000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000); + if (likely(t >= 0)) + return t / milliseconds / scale_diff * milliseconds; + else + return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; + } + else + if (likely(t >= 0)) + return t / milliseconds * milliseconds; + else + return ((t + 1) / milliseconds - 1) * milliseconds; + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000) + { + Int64 t_microseconds = 0; + if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); + if (likely(t >= 0)) + return t_microseconds / microseconds * microseconds; + else + return ((t_microseconds + 1) / microseconds - 1) * microseconds; + } + else if (scale_multiplier > 1000000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000000); + if (likely(t >= 0)) + return t / microseconds / scale_diff * microseconds; + else + return ((t + 1) / microseconds / scale_diff - 1) * microseconds; + } + else + if (likely(t >= 0)) + return t / microseconds * microseconds; + else + return ((t + 1) / microseconds - 1) * microseconds; + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000000) + { + Int64 t_nanoseconds = 0; + if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); + if (likely(t >= 0)) + return t_nanoseconds / nanoseconds * nanoseconds; + else + return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; + } + else + if (likely(t >= 0)) + return t / nanoseconds * nanoseconds; + else + return ((t + 1) / nanoseconds - 1) * nanoseconds; + } +}; class FunctionToStartOfInterval : public IFunction { @@ -218,6 +500,10 @@ private: if (isDateTime64(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDateTime64(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); auto scale = assert_cast(time_column_type).getScale(); @@ -226,18 +512,30 @@ private: } else if (isDateTime(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDateTime(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDate(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDate32(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); @@ -292,7 +590,7 @@ private: } template - ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, [[maybe_unused]] const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; using ToFieldType = typename ToDataType::FieldType; @@ -309,18 +607,18 @@ private: if (origin_column.column == nullptr) for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); else { UInt64 od = origin_column.column->get64(0); - + for (size_t i = 0; i != size; ++i) { auto td = time_data[i]; if (od > size_t(td)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); td -= od; - result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(Transform::execute(td, num_units, time_zone, scale_multiplier)); result_data[i] += scale_multiplier == 10 ? od : od / scale_multiplier; } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference new file mode 100644 index 00000000000..7213925fb64 --- /dev/null +++ b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference @@ -0,0 +1,15 @@ +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-01-02 15:44:30 +2023-01-02 15:44:30 +2023-01-02 14:45:30 +2023-01-02 14:45:30 +2023-01-02 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql new file mode 100644 index 00000000000..ce4c8f87811 --- /dev/null +++ b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql @@ -0,0 +1,12 @@ +set session_timezone = 'UTC'; +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalMinute(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalWeek(1), toDate('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError 43 } +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError 43 } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError 36 } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError 44 } From f596ae7a2c02e7915917daca82dad717a7df2640 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 18 Nov 2023 16:24:56 +0000 Subject: [PATCH 05/79] Fix style --- src/Functions/toStartOfInterval.cpp | 7 ++++--- utils/check-style/aspell-ignore/en/aspell-dict.txt | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 8529643ee70..10792922afb 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -20,11 +20,12 @@ namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; + extern const int DECIMAL_OVERFLOW; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 5d11185ff76..7eaafe8a777 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -261,6 +261,7 @@ FOSDEM FQDN Failover FarmHash +FileLog FilesystemCacheBytes FilesystemCacheElements FilesystemCacheFiles @@ -278,7 +279,6 @@ FilesystemMainPathTotalBytes FilesystemMainPathTotalINodes FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes -FileLog FixedString Flink ForEach @@ -571,13 +571,13 @@ NetworkSendPackets NodeJs NuRaft NumHexagons +NumPy NumToString NumToStringClassC NumberOfDatabases NumberOfDetachedByUserParts NumberOfDetachedParts NumberOfTables -NumPy OFNS OLAP OLTP @@ -588,10 +588,10 @@ OSGuestNiceTimeNormalized OSGuestTime OSGuestTimeCPU OSGuestTimeNormalized +OSIOWaitMicroseconds OSIOWaitTime OSIOWaitTimeCPU OSIOWaitTimeNormalized -OSIOWaitMicroseconds OSIdleTime OSIdleTimeCPU OSIdleTimeNormalized @@ -900,6 +900,7 @@ ThreadPoolRemoteFSReaderThreads ThreadPoolRemoteFSReaderThreadsActive ThreadsActive ThreadsInOvercommitTracker +TimescaleDB's Timeunit TinyLog Tkachenko @@ -1470,12 +1471,12 @@ fastops fcoverage fibonacci fifo +filelog filesystem filesystemAvailable filesystemCapacity filesystemFree filesystems -filelog finalizeAggregation fips firstLine From ea2ba82c1001febabdd8753246b64143e4bf1f6f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 18 Nov 2023 16:37:50 +0000 Subject: [PATCH 06/79] Update docs --- .../functions/date-time-functions.md | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 1291b570da4..989b39e46c1 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1345,7 +1345,7 @@ toStartOfTenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:20:00 Rounds down the date with time to the start of the fifteen-minute interval. -## toStartOfInterval(date_or_date_with_time, INTERVAL x unit \[, origin_time_or_data\] \[, time_zone\]) +## toStartOfInterval This function generalizes other `toStartOf*()` functions. For example, - `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, @@ -1372,21 +1372,21 @@ The calculation is performed relative to specific points in time: (*) hour intervals are special: the calculation is always performed relative to 00:00:00 (midnight) of the current day. As a result, only hour values between 1 and 23 are useful. -Also it has an overload including `origin_time_or_data` column which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function: -``` SQL -SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), toIntervalMinute(1), toDateTime('2023-01-01 14:35:30')); +**Syntax** + +```sql +toStartOfInterval(value, INTERVAL x unit[, time_zone]) +toStartOfInterval(value, INTERVAL x unit[, origin[, time_zone]]) ``` +The second overload emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function, e.g. + +``` SQL +SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30')); +``` **See Also** - - - [date_trunc](#date_trunc) -Also it has an overload including `origin_time_or_data` column which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function: -``` SQL -SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), toIntervalMinute(1), toDateTime('2023-01-01 14:35:30')); -``` - ## toTime Converts a date with time to a certain fixed date, while preserving the time. @@ -2462,19 +2462,29 @@ SELECT └──────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ ``` -## timeSlots(StartTime, Duration,\[, Size\]) +## timeSlots For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. This is necessary, for example, when searching for pageviews in the corresponding session. Accepts DateTime and DateTime64 as ’StartTime’ argument. For DateTime, ’Duration’ and ’Size’ arguments must be `UInt32`. For ’DateTime64’ they must be `Decimal64`. Returns an array of DateTime/DateTime64 (return type matches the type of ’StartTime’). For DateTime64, the return value's scale can differ from the scale of ’StartTime’ --- the highest scale among all given arguments is taken. -Example: +**Syntax** + +```sql +timeSlots(StartTime, Duration,\[, Size\]) +``` + +**Example** + ```sql SELECT timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600)); SELECT timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299); SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0)); ``` + +Result: + ``` text ┌─timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600))─┐ │ ['2012-01-01 12:00:00','2012-01-01 12:30:00'] │ From 74449872180542ebdcc26e7410100b6d897efc65 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 20 Nov 2023 18:23:18 +0000 Subject: [PATCH 07/79] need to deal with week+ time intervals --- src/Functions/toStartOfInterval.cpp | 311 +----------------- ...to_start_of_interva_with_origin.reference} | 0 ...6_to_start_of_interval_origin_overload.sql | 12 - ...02916_to_start_of_interval_with_origin.sql | 42 +++ 4 files changed, 58 insertions(+), 307 deletions(-) rename tests/queries/0_stateless/{02916_to_start_of_interval_origin_overload.reference => 02916_to_start_of_interva_with_origin.reference} (100%) delete mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql create mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 10792922afb..fd472d453b5 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -20,297 +20,16 @@ namespace DB { namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int BAD_ARGUMENTS; - extern const int DECIMAL_OVERFLOW; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int BAD_ARGUMENTS; } namespace { -constexpr auto function_name = "toStartOfInterval"; - -template -struct Transform; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(DayNum(d), years); - } - - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); - } - - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); - } - - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); - } -}; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); - } - - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); - } - - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); - } - - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); - } -}; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(DayNum(d), months); - } - - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); - } - - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); - } - - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); - } -}; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(DayNum(d), weeks); - } -static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); - } - - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); - } - - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); - } - - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfHourInterval(t, hours); - } - - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMinuteInterval(t, minutes); - } - - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000) - { - Int64 t_milliseconds = 0; - if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_milliseconds / milliseconds * milliseconds; - else - return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; - } - else if (scale_multiplier > 1000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000); - if (likely(t >= 0)) - return t / milliseconds / scale_diff * milliseconds; - else - return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; - } - else - if (likely(t >= 0)) - return t / milliseconds * milliseconds; - else - return ((t + 1) / milliseconds - 1) * milliseconds; - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000) - { - Int64 t_microseconds = 0; - if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_microseconds / microseconds * microseconds; - else - return ((t_microseconds + 1) / microseconds - 1) * microseconds; - } - else if (scale_multiplier > 1000000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000000); - if (likely(t >= 0)) - return t / microseconds / scale_diff * microseconds; - else - return ((t + 1) / microseconds / scale_diff - 1) * microseconds; - } - else - if (likely(t >= 0)) - return t / microseconds * microseconds; - else - return ((t + 1) / microseconds - 1) * microseconds; - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000000) - { - Int64 t_nanoseconds = 0; - if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_nanoseconds / nanoseconds * nanoseconds; - else - return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; - } - else - if (likely(t >= 0)) - return t / nanoseconds * nanoseconds; - else - return ((t + 1) / nanoseconds - 1) * nanoseconds; - } -}; class FunctionToStartOfInterval : public IFunction { @@ -485,7 +204,7 @@ public: if (has_origin_arg) origin_column = arguments[2]; - const size_t time_zone_arg_num = (arguments.size() == 3 && isString(arguments[2].type)) ? 2 : 3; + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); @@ -501,9 +220,8 @@ private: if (isDateTime64(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDateTime64(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + if (!isDateTime64(origin_column.type.get()) && origin_column.column != nullptr) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); auto scale = assert_cast(time_column_type).getScale(); @@ -608,20 +326,23 @@ private: if (origin_column.column == nullptr) for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); else { - UInt64 od = origin_column.column->get64(0); + UInt64 origin = origin_column.column->get64(0); + std::cerr << "origin: " << origin << std::endl; + std::cerr << "scale_multiplier: " << scale_multiplier << std::endl; for (size_t i = 0; i != size; ++i) { auto td = time_data[i]; - if (od > size_t(td)) + if (origin > size_t(td)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); - td -= od; - result_data[i] = static_cast(Transform::execute(td, num_units, time_zone, scale_multiplier)); - - result_data[i] += scale_multiplier == 10 ? od : od / scale_multiplier; + td -= origin; + result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) + origin = origin / scale_multiplier; + result_data[i] += origin; } } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference b/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference similarity index 100% rename from tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference rename to tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql deleted file mode 100644 index ce4c8f87811..00000000000 --- a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql +++ /dev/null @@ -1,12 +0,0 @@ -set session_timezone = 'UTC'; -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalMinute(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalWeek(1), toDate('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError 43 } -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError 43 } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError 36 } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError 44 } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql new file mode 100644 index 00000000000..5f5f941047a --- /dev/null +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -0,0 +1,42 @@ +set session_timezone = 'UTC'; + +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9)); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6)); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3)); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2)); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30')); + +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam'); + +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); + +SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalSecond(5), toDate32('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalMillisecond(12), toDateTime('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-01-02 14:45:50'), toIntervalHour(5), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError BAD_ARGUMENTS } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError ILLEGAL_COLUMN } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 'Europe/Amsterdam', 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5, 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), materialize(toDateTime('2023-01-02 14:44:30')), 'Europe/Amsterdam'); -- { serverError ILLEGAL_COLUMN } From 8cd4d35ead1dc7ea5fff3a794522ffb5dbc75726 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:35:33 +0000 Subject: [PATCH 08/79] fixed review --- src/Functions/toStartOfInterval.cpp | 26 ++++++++------ ..._to_start_of_interva_with_origin.reference | 35 ++++++++++++++----- ...02916_to_start_of_interval_with_origin.sql | 18 +++++----- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index fd472d453b5..c4f60d6d6db 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,6 +1,8 @@ #include +#include "Common/IntervalKind.h" #include #include +#include "base/types.h" #include #include #include @@ -220,7 +222,7 @@ private: if (isDateTime64(time_column_type)) { - if (!isDateTime64(origin_column.type.get()) && origin_column.column != nullptr) + if (origin_column.column != nullptr && !isDateTime64(origin_column.type.get())) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); @@ -231,9 +233,8 @@ private: } else if (isDateTime(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDateTime(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + if (origin_column.column != nullptr && !isDateTime(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) @@ -241,9 +242,8 @@ private: } else if (isDate(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDate(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + if (origin_column.column != nullptr && !isDate(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) @@ -330,19 +330,23 @@ private: else { UInt64 origin = origin_column.column->get64(0); - std::cerr << "origin: " << origin << std::endl; - std::cerr << "scale_multiplier: " << scale_multiplier << std::endl; for (size_t i = 0; i != size; ++i) { auto td = time_data[i]; + result_data[i] = 0; if (origin > size_t(td)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); + td -= origin; - result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + auto res = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) origin = origin / scale_multiplier; - result_data[i] += origin; + if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + result_data[i] = UInt16(origin/86400 + res); + else + result_data[i] += origin + res; } } diff --git a/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference index 7213925fb64..78e3b117cb8 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference @@ -1,15 +1,34 @@ +2023-01-02 14:45:50.917120700 +2023-01-02 14:45:50.910600 +2023-01-02 14:45:50.901 +2023-01-02 14:45:50 +2023-01-02 14:45:30 +2023-01-02 14:44:30 +2023-01-02 14:44:30 +2023-01-06 +2023-03-02 +2023-07-02 +2022-01-02 +2023-01-02 15:45:50.917120700 +2023-01-02 15:45:50.910600 +2023-01-02 15:45:50.901 +2023-01-02 15:45:50 +2023-01-02 15:45:30 +2023-01-02 15:44:30 +2023-01-03 14:44:30 +2023-01-06 +2023-03-02 +2023-07-02 +2022-01-02 2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 +2023-02-01 16:54:33 +2023-03-01 16:54:38 +2023-02-01 16:54:52 +2023-03-01 16:54:16 2023-03-01 16:55:00 2023-02-01 16:55:00 2023-03-01 16:55:00 2023-02-01 16:55:00 2023-03-01 16:55:00 2023-01-02 15:44:30 -2023-01-02 15:44:30 -2023-01-02 14:45:30 -2023-01-02 14:45:30 -2023-01-02 +2023-02-01 16:44:30 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 5f5f941047a..390b36a4cf1 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -7,10 +7,10 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2)); SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30')); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6), 'Europe/Amsterdam'); @@ -19,15 +19,15 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30'), 'Europe/Amsterdam'); SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalSecond(5), toDate32('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalMillisecond(12), toDateTime('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From b298bed1cfdb67e21da89e2b8a4bb4affc44f14b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:08:19 +0100 Subject: [PATCH 09/79] Rename 02916_to_start_of_interva_with_origin.reference to 02916_to_start_of_interval_with_origin.reference --- ...reference => 02916_to_start_of_interval_with_origin.reference} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02916_to_start_of_interva_with_origin.reference => 02916_to_start_of_interval_with_origin.reference} (100%) diff --git a/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference similarity index 100% rename from tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference rename to tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference From 450f609227fe0cedefbf2473a1a2c4b4e7b65d5f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 2 Dec 2023 22:58:52 +0000 Subject: [PATCH 10/79] Various updates --- src/Functions/toStartOfInterval.cpp | 29 +++-- ...to_start_of_interval_with_origin.reference | 74 +++++++------ ...02916_to_start_of_interval_with_origin.sql | 103 ++++++++++++------ 3 files changed, 128 insertions(+), 78 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index c4f60d6d6db..ec0deee8abd 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,21 +1,21 @@ -#include #include "Common/IntervalKind.h" -#include -#include -#include "base/types.h" -#include -#include #include #include +#include +#include #include #include #include #include #include +#include #include #include +#include #include #include +#include +#include namespace DB @@ -325,26 +325,31 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); if (origin_column.column == nullptr) + { for (size_t i = 0; i != size; ++i) result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + } else { UInt64 origin = origin_column.column->get64(0); for (size_t i = 0; i != size; ++i) { - auto td = time_data[i]; - result_data[i] = 0; - if (origin > size_t(td)) + auto t = time_data[i]; + if (origin > static_cast(t)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); - td -= origin; - auto res = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + t -= origin; + auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_multiplier)); if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) origin = origin / scale_multiplier; + + static constexpr size_t SECONDS_PER_DAY = 86400; + + result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) - result_data[i] = UInt16(origin/86400 + res); + result_data[i] = static_cast(origin/SECONDS_PER_DAY + res); else result_data[i] += origin + res; } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 78e3b117cb8..3574da8f685 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -1,34 +1,40 @@ -2023-01-02 14:45:50.917120700 -2023-01-02 14:45:50.910600 -2023-01-02 14:45:50.901 -2023-01-02 14:45:50 -2023-01-02 14:45:30 -2023-01-02 14:44:30 -2023-01-02 14:44:30 -2023-01-06 -2023-03-02 -2023-07-02 -2022-01-02 -2023-01-02 15:45:50.917120700 -2023-01-02 15:45:50.910600 -2023-01-02 15:45:50.901 -2023-01-02 15:45:50 -2023-01-02 15:45:30 -2023-01-02 15:44:30 -2023-01-03 14:44:30 -2023-01-06 -2023-03-02 -2023-07-02 -2022-01-02 -2023-03-01 16:55:00 -2023-02-01 16:54:33 -2023-03-01 16:54:38 -2023-02-01 16:54:52 -2023-03-01 16:54:16 -2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 -2023-01-02 15:44:30 -2023-02-01 16:44:30 +-- Negative tests +Time and origin as Time +1971-01-01 +1971-07-01 +1970-02-01 +1970-01-05 +1970-01-02 05:27:18 +Time and origin as DateTime +2023-02-01 +2023-08-01 +2023-10-09 +2023-10-05 +2023-10-09 09:08:07 +2023-10-09 10:10:07 +2023-10-09 10:11:11 +2023-10-09 10:11:12 +Time and origin as DateTime64(9) +2023-02-01 +2023-08-01 +2023-10-09 +2023-10-05 +2023-10-09 09:08:07 +2023-10-09 10:10:07 +2023-10-09 10:11:11 +2023-10-09 10:11:12 +2299-12-31 23:57:37.653 +2299-12-31 23:42:04.320986 +2023-10-09 10:11:12.987654321 +Time and origin as DateTime64(3) +2023-02-01 +2023-08-01 +2023-10-09 +2023-10-05 +2023-10-09 09:08:07 +2023-10-09 10:10:07 +2023-10-09 10:11:11 +2023-10-09 10:11:12 +2023-10-09 10:11:12.987 +1970-01-20 15:20:47.136123 +1970-01-01 00:28:17.710272123 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 390b36a4cf1..864ef56e7ea 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -1,42 +1,81 @@ set session_timezone = 'UTC'; -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9)); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6)); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3)); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2)); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30')); - -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30'), 'Europe/Amsterdam'); - -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT '-- Negative tests'; +-- time and origin arguments must have the same type SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalSecond(5), toDate32('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalMillisecond(12), toDateTime('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate32('2023-01-02 14:45:50'), toIntervalHour(5), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- the origin must be before the time SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError BAD_ARGUMENTS } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError ILLEGAL_COLUMN } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 'Europe/Amsterdam', 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5, 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- the origin must be constant +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00')) from numbers(1); -- { serverError ILLEGAL_COLUMN } SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), materialize(toDateTime('2023-01-02 14:44:30')), 'Europe/Amsterdam'); -- { serverError ILLEGAL_COLUMN } + +-- with 4 arguments, the 3rd one must not be a string or an integer +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 'Europe/Amsterdam', 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5, 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- too many arguments +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT 'Time and origin as Time'; +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalWeek(1), toDate('2023-10-01')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalDay(1), toDate('2023-10-08')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalHour(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMinute(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalSecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMillisecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMicrosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalNanosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Time and origin as DateTime'; +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-02-01 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-08-01 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), toDateTime('2023-09-08 09:08:07')); -- broken, should that not return 2023-10-09 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); -- broken, should that not return 2023-10-05 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalDay(1), toDateTime('2023-10-08 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalHour(1), toDateTime('2023-10-09 09:10:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 10:10:11')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalSecond(1), toDateTime('2023-10-09 10:11:10')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMillisecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMicrosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalNanosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Time and origin as DateTime64(9)'; +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) + +SELECT 'Time and origin as DateTime64(3)'; +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) + +-- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +-- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +-- SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +-- SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); From 01036a21f59925b78f45415cacb1953481292e31 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:44:40 +0100 Subject: [PATCH 11/79] Fixed bugs regarding precision --- src/Functions/toStartOfInterval.cpp | 124 +++++++++++++++++++++------- 1 file changed, 95 insertions(+), 29 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ec0deee8abd..da4eba9a594 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,8 +1,10 @@ -#include "Common/IntervalKind.h" +#include +#include #include #include #include #include +#include "DataTypes/IDataType.h" #include #include #include @@ -14,7 +16,6 @@ #include #include #include -#include #include @@ -116,9 +117,22 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); + + if (arguments[0].type.get() != arguments[2].type.get()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); } else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) + { third_argument = ThirdArgument::IsOrigin; + if (isDateTime64(arguments[0].type) && isDateTime64(arguments[2].type)) + result_type = ResultType::DateTime64; + else if (isDateTime(arguments[0].type) && isDateTime(arguments[2].type)) + result_type = ResultType::DateTime; + else if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && (isDate(arguments[2].type) || isDate32(arguments[2].type))) + result_type = ResultType::Date; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " "This argument is optional and must be a constant String with timezone name or a Date/Date32/DateTime/DateTime64 with a constant origin", @@ -180,13 +194,14 @@ public: } case ResultType::DateTime64: { - UInt32 scale = 0; + auto scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); + UInt32 scale = scale_date_time; if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; + scale = 9 > scale_date_time ? 9 : scale_date_time; else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; + scale = 6 > scale_date_time ? 6 : scale_date_time; else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; + scale = 3 > scale_date_time ? 3 : scale_date_time; const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); @@ -209,11 +224,19 @@ public: const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); - auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + ColumnPtr result_column = nullptr; + if (isDateTime64(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else if (isDateTime(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + return result_column; } private: + template ColumnPtr dispatchForTimeColumn( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { @@ -229,7 +252,7 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } else if (isDateTime(time_column_type)) { @@ -238,7 +261,7 @@ private: const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate(time_column_type)) { @@ -247,7 +270,7 @@ private: const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { @@ -257,12 +280,12 @@ private: const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName()); } - template + template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const @@ -282,32 +305,52 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Microsecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Millisecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Second: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Day: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Week: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Month: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Year: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); } + template + Int64 decideScaleOnPrecision(const UInt16 scale) const + { + static constexpr Int64 MILLISECOND_SCALE = 1000; + static constexpr Int64 MICROSECOND_SCALE = 1000000; + static constexpr Int64 NANOSECOND_SCALE = 1000000000; + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + switch (unit) + { + case IntervalKind::Millisecond: + return MILLISECOND_SCALE; + case IntervalKind::Microsecond: + return MICROSECOND_SCALE; + case IntervalKind::Nanosecond: + return NANOSECOND_SCALE; + default: + return scale_multiplier; + } + } + template ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { @@ -323,6 +366,8 @@ private: result_data.resize(size); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + Int64 scale_on_precision = decideScaleOnPrecision(scale); + Int64 scale_diff = scale_on_precision > scale_multiplier ? scale_on_precision / scale_multiplier : scale_multiplier / scale_on_precision; if (origin_column.column == nullptr) { @@ -342,19 +387,40 @@ private: t -= origin; auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_multiplier)); - if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) - origin = origin / scale_multiplier; - static constexpr size_t SECONDS_PER_DAY = 86400; result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) - result_data[i] = static_cast(origin/SECONDS_PER_DAY + res); + { + if (isDate(result_type) || isDate32(result_type)) + { + result_data[i] += origin + res; + } + else if (isDateTime64(result_type)) + { + result_data[i] += origin + (res * SECONDS_PER_DAY * scale_multiplier); + } + else + { + result_data[i] += origin + res * SECONDS_PER_DAY; + } + } else - result_data[i] += origin + res; + { + if (isDate(result_type) || isDate32(result_type)) + res = res / SECONDS_PER_DAY; + + if (scale_on_precision > scale_multiplier) + { + result_data[i] += (origin + res / scale_diff) * scale_diff; + } + else + { + result_data[i] += origin + res * scale_diff; + } + } } } - return result_col; } }; From 65f34394cf84fa12f6852e9988c8a3d62c155701 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:45:29 +0100 Subject: [PATCH 12/79] Update 02916_to_start_of_interval_with_origin.reference --- ...to_start_of_interval_with_origin.reference | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 3574da8f685..806330743d7 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -1,40 +1,40 @@ -- Negative tests Time and origin as Time -1971-01-01 -1971-07-01 -1970-02-01 -1970-01-05 -1970-01-02 05:27:18 -Time and origin as DateTime 2023-02-01 2023-08-01 2023-10-09 2023-10-05 +2023-10-08 +Time and origin as DateTime +2023-02-01 09:08:07 +2023-08-01 09:08:07 +2023-10-09 09:08:07 +2023-10-05 09:08:07 2023-10-09 09:08:07 2023-10-09 10:10:07 2023-10-09 10:11:11 2023-10-09 10:11:12 Time and origin as DateTime64(9) -2023-02-01 -2023-08-01 -2023-10-09 -2023-10-05 -2023-10-09 09:08:07 -2023-10-09 10:10:07 -2023-10-09 10:11:11 -2023-10-09 10:11:12 -2299-12-31 23:57:37.653 -2299-12-31 23:42:04.320986 +2023-02-01 09:08:07.123456789 +2023-08-01 09:08:07.123456789 +2023-09-10 09:08:07.123456789 +2023-10-05 09:08:07.123456789 +2023-10-08 09:08:07.123543189 +2023-10-09 09:10:07.123460389 +2023-10-09 10:10:11.123456849 +2023-10-09 10:11:10.123456791 +2023-10-09 10:11:12.987456789 +2023-10-09 10:11:12.987653789 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) -2023-02-01 -2023-08-01 -2023-10-09 -2023-10-05 -2023-10-09 09:08:07 -2023-10-09 10:10:07 -2023-10-09 10:11:11 -2023-10-09 10:11:12 +2023-02-01 09:08:07.123 +2023-08-01 09:08:07.123 +2023-10-09 09:08:07.123 +2023-10-05 09:08:07.123 +2023-10-08 09:09:33.523 +2023-10-09 09:10:10.723 +2023-10-09 10:10:11.183 +2023-10-09 10:11:10.125 2023-10-09 10:11:12.987 -1970-01-20 15:20:47.136123 -1970-01-01 00:28:17.710272123 +2023-10-09 10:11:12.987000 +2023-10-09 10:11:12.987000000 From bd105d51ce469d9ed0c741a7a45012d506e34ddd Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:46:00 +0100 Subject: [PATCH 13/79] fixed tests --- ...02916_to_start_of_interval_with_origin.sql | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 864ef56e7ea..853103ecc77 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -24,11 +24,11 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5 SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT 'Time and origin as Time'; -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalWeek(1), toDate('2023-10-01')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalDay(1), toDate('2023-10-08')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalWeek(1), toDate('2023-10-01')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalDay(1), toDate('2023-10-08')); SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalHour(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMinute(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalSecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -37,10 +37,10 @@ SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMicrosecond(1), toDate( SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalNanosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT 'Time and origin as DateTime'; -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-02-01 09:08:07? -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-08-01 09:08:07? -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), toDateTime('2023-09-08 09:08:07')); -- broken, should that not return 2023-10-09 09:08:07? -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); -- broken, should that not return 2023-10-05 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), toDateTime('2023-09-08 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalDay(1), toDateTime('2023-10-08 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalHour(1), toDateTime('2023-10-09 09:10:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 10:10:11')); @@ -50,30 +50,30 @@ SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMicrosecon SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalNanosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT 'Time and origin as DateTime64(9)'; -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123456789', 9)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMonth(1), toDateTime64('2023-09-10 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123456789', 9)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); SELECT 'Time and origin as DateTime64(3)'; -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123', 3)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); -- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); From 885e44c50e1968d660866e53f25110cb662e27a2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 18:17:41 +0100 Subject: [PATCH 14/79] style fix --- src/Functions/toStartOfInterval.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index da4eba9a594..4fae8cb6bb6 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -231,7 +231,6 @@ public: result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); else result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); - return result_column; } From e5fdad21609704baa3b90eb07161461120364fb2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:09:55 +0100 Subject: [PATCH 15/79] fix non-const arguments --- src/Functions/toStartOfInterval.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 4fae8cb6bb6..94514d1e1a0 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -392,17 +392,11 @@ private: if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) { if (isDate(result_type) || isDate32(result_type)) - { result_data[i] += origin + res; - } else if (isDateTime64(result_type)) - { result_data[i] += origin + (res * SECONDS_PER_DAY * scale_multiplier); - } else - { result_data[i] += origin + res * SECONDS_PER_DAY; - } } else { @@ -410,13 +404,11 @@ private: res = res / SECONDS_PER_DAY; if (scale_on_precision > scale_multiplier) - { result_data[i] += (origin + res / scale_diff) * scale_diff; - } + else if (scale_on_precision == scale_multiplier && scale_on_precision != 10) /// scale == 10 is default case + result_data[i] += origin + (res * scale_on_precision); else - { result_data[i] += origin + res * scale_diff; - } } } } From ac8d80da515144d2d4fe3ed0c47e2bab1d171927 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:28:06 +0100 Subject: [PATCH 16/79] fix --- src/Functions/toStartOfInterval.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 94514d1e1a0..e67d3378f6e 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -405,7 +405,8 @@ private: if (scale_on_precision > scale_multiplier) result_data[i] += (origin + res / scale_diff) * scale_diff; - else if (scale_on_precision == scale_multiplier && scale_on_precision != 10) /// scale == 10 is default case + else if (scale_on_precision == scale_multiplier && scale_on_precision % 1000 != 0 && scale_multiplier != 10) /// when it's not a default case with DateTime + /// and when precision is not sub-scale result_data[i] += origin + (res * scale_on_precision); else result_data[i] += origin + res * scale_diff; From 7be47eca5e3ee31ef958f5b9e9804ceb15f35f48 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:28:27 +0100 Subject: [PATCH 17/79] Update 02916_to_start_of_interval_with_origin.reference --- ...02916_to_start_of_interval_with_origin.reference | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 806330743d7..870853bc371 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -38,3 +38,16 @@ Time and origin as DateTime64(3) 2023-10-09 10:11:12.987 2023-10-09 10:11:12.987000 2023-10-09 10:11:12.987000000 +Non-const arguments +2023-03-01 16:55:00.00 +2023-02-01 16:55:00.00 +2023-03-01 16:55:00.00 +2023-02-01 16:55:00.00 +2023-03-01 16:55:00.00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-01-02 15:44:30 +2023-02-01 16:44:30.00 From 969c7f36a5db55ef53cbce2def91820b619c23ea Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:28:45 +0100 Subject: [PATCH 18/79] fix tests --- .../02916_to_start_of_interval_with_origin.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 853103ecc77..71f5fb7fb36 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -75,7 +75,8 @@ SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalM SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); --- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); --- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); --- SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); --- SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT 'Non-const arguments'; +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); From 3027f3a04f99beafdc8ead3641aa84fdf4f6657a Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 7 Dec 2023 15:29:11 +0000 Subject: [PATCH 19/79] fix tests --- src/Functions/toStartOfInterval.cpp | 23 ++++++++++--------- .../02207_subseconds_intervals.reference | 16 ++++++------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e67d3378f6e..7c75d7aa8e9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -117,9 +117,6 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); - - if (arguments[0].type.get() != arguments[2].type.get()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); } else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) { @@ -137,7 +134,6 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " "This argument is optional and must be a constant String with timezone name or a Date/Date32/DateTime/DateTime64 with a constant origin", type_arg3->getName(), getName()); - }; auto check_fourth_argument = [&] @@ -365,13 +361,19 @@ private: result_data.resize(size); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - Int64 scale_on_precision = decideScaleOnPrecision(scale); - Int64 scale_diff = scale_on_precision > scale_multiplier ? scale_on_precision / scale_multiplier : scale_multiplier / scale_on_precision; + Int64 scale_on_interval = decideScaleOnPrecision(scale); + Int64 scale_diff = scale_on_interval > scale_multiplier ? scale_on_interval / scale_multiplier : scale_multiplier / scale_on_interval; if (origin_column.column == nullptr) { for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + { + result_data[i] = 0; + if (scale_on_interval < scale_multiplier) + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)) * scale_diff; + else + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + } } else { @@ -403,11 +405,10 @@ private: if (isDate(result_type) || isDate32(result_type)) res = res / SECONDS_PER_DAY; - if (scale_on_precision > scale_multiplier) + if (scale_on_interval > scale_multiplier) result_data[i] += (origin + res / scale_diff) * scale_diff; - else if (scale_on_precision == scale_multiplier && scale_on_precision % 1000 != 0 && scale_multiplier != 10) /// when it's not a default case with DateTime - /// and when precision is not sub-scale - result_data[i] += origin + (res * scale_on_precision); + else if (scale_on_interval == scale_multiplier && scale_on_interval % 1000 != 0 && scale_multiplier != 10) + result_data[i] += origin + (res * scale_on_interval); else result_data[i] += origin + res * scale_diff; } diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 91f0ecb8606..b0edbda5e76 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.123456 -1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.12345600 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.12345600 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.12345600 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123 -1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123000 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123 +1930-12-12 12:12:12.123000 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123 +2220-12-12 12:12:12.123000 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 From d08fd931bec203f1b86d89227b766973ad3612c9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 13:42:00 +0000 Subject: [PATCH 20/79] fix error --- src/Functions/toStartOfInterval.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 7c75d7aa8e9..e0301f45ded 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -190,8 +190,10 @@ public: } case ResultType::DateTime64: { - auto scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); - UInt32 scale = scale_date_time; + UInt32 scale = 0; + auto scale_date_time = 0; + if (third_argument == ThirdArgument::IsOrigin) + scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) scale = 9 > scale_date_time ? 9 : scale_date_time; else if (interval_type->getKind() == IntervalKind::Microsecond) From e13eec9c5a9cb49004aef8d635034e9ccfd697d4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:07:39 +0000 Subject: [PATCH 21/79] fix --- src/Functions/toStartOfInterval.cpp | 11 +++++------ .../02207_subseconds_intervals.reference | 16 ++++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e0301f45ded..e90f965a6f9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -191,15 +191,14 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - auto scale_date_time = 0; - if (third_argument == ThirdArgument::IsOrigin) - scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); + if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) + scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9 > scale_date_time ? 9 : scale_date_time; + scale = 9 > scale ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6 > scale_date_time ? 6 : scale_date_time; + scale = 6 > scale ? 6 : scale; else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3 > scale_date_time ? 3 : scale_date_time; + scale = 3 > scale ? 3 : scale; const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index b0edbda5e76..91f0ecb8606 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.12345600 -1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123456 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.12345600 +1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.12345600 +2220-12-12 12:12:12.123456 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123000 -1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123000 +1930-12-12 12:12:12.123 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123000 +2220-12-12 12:12:12.123 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 From 4c83b7e46ffe50409aab35b889839fb6cb92a18e Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:18:43 +0000 Subject: [PATCH 22/79] style fix --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e90f965a6f9..b55d92f809b 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -191,7 +191,7 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) + if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) scale = 9 > scale ? 9 : scale; From 5793725bc7ae8cfd66bea9c9f82ef0e30c124314 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:45:45 +0000 Subject: [PATCH 23/79] tests --- .../02207_subseconds_intervals.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 91f0ecb8606..bedd5d4878b 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.123456 -1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.12345600 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.12345600 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.12345600 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123 -1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123000 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123 +1930-12-12 12:12:12.123000 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123 +2220-12-12 12:12:12.123000 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 @@ -75,4 +75,4 @@ test subtract[...]seconds() 2022-12-31 23:59:59.999 2022-12-31 23:59:59.900 2023-01-01 00:00:00.001 -2023-01-01 00:00:00.100 +2023-01-01 00:00:00.100 \ No newline at end of file From 53ef9c0cb80c734caa449430ff4600f32b900134 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:38:25 +0100 Subject: [PATCH 24/79] Update 02207_subseconds_intervals.reference --- tests/queries/0_stateless/02207_subseconds_intervals.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index bedd5d4878b..b0edbda5e76 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -75,4 +75,4 @@ test subtract[...]seconds() 2022-12-31 23:59:59.999 2022-12-31 23:59:59.900 2023-01-01 00:00:00.001 -2023-01-01 00:00:00.100 \ No newline at end of file +2023-01-01 00:00:00.100 From 87bda03da17cdf0e3878a1df73175c92bae834c6 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 21:36:24 +0100 Subject: [PATCH 25/79] logical error --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b55d92f809b..b6a3a9389d6 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -191,7 +191,7 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) + if (isDate32(arguments[0].type) || isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) scale = 9 > scale ? 9 : scale; From d1c49cc9bcb869030361821e60349e8054a51c4b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:55:53 +0100 Subject: [PATCH 26/79] Added comments, simplified and fixed review --- src/Functions/toStartOfInterval.cpp | 138 ++++++++++++++++------------ 1 file changed, 81 insertions(+), 57 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b6a3a9389d6..6c71b357590 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,9 +1,7 @@ -#include -#include #include #include #include -#include +#include #include "DataTypes/IDataType.h" #include #include @@ -16,7 +14,7 @@ #include #include #include -#include +#include namespace DB @@ -31,21 +29,23 @@ namespace ErrorCodes } -namespace -{ - class FunctionToStartOfInterval : public IFunction { public: + enum class Overload + { + Default, /// toStartOfInterval(time, interval) or toStartOfInterval(time, interval, timezone) + Origin /// toStartOfInterval(time, interval, origin) or toStartOfInterval(time, interval, origin, timezone) + }; + mutable Overload overload; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } static constexpr auto name = "toStartOfInterval"; String getName() const override { return name; } - bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } @@ -59,8 +59,9 @@ public: { const DataTypePtr & type_arg1 = arguments[0].type; if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1st argument of function {}. " - "Should be a date or a date with time", type_arg1->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of 1st argument of function {}, expected a Date, DateTime or DateTime64", + type_arg1->getName(), getName()); value_is_date = isDate(type_arg1); }; @@ -75,10 +76,14 @@ public: auto check_second_argument = [&] { const DataTypePtr & type_arg2 = arguments[1].type; + interval_type = checkAndGetDataType(type_arg2.get()); if (!interval_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. " - "Should be an interval of time", type_arg2->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of 2nd argument of function {}, expected a time interval", + type_arg2->getName(), getName()); + + /// Result here is determined for default overload (without origin) switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: @@ -89,7 +94,7 @@ public: case IntervalKind::Second: case IntervalKind::Minute: case IntervalKind::Hour: - case IntervalKind::Day: + case IntervalKind::Day: /// weird why Day leads to DateTime but too afraid to change it result_type = ResultType::DateTime; break; case IntervalKind::Week: @@ -101,31 +106,26 @@ public: } }; - enum class ThirdArgument - { - IsTimezone, - IsOrigin - }; - ThirdArgument third_argument; /// valid only if 3rd argument is given auto check_third_argument = [&] { const DataTypePtr & type_arg3 = arguments[2].type; if (isString(type_arg3)) { - third_argument = ThirdArgument::IsTimezone; + overload = Overload::Default; + if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); } - else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) + else if (isDateTimeOrDateTime64(type_arg3) || isDate(type_arg3)) { - third_argument = ThirdArgument::IsOrigin; + overload = Overload::Origin; if (isDateTime64(arguments[0].type) && isDateTime64(arguments[2].type)) result_type = ResultType::DateTime64; else if (isDateTime(arguments[0].type) && isDateTime(arguments[2].type)) result_type = ResultType::DateTime; - else if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && (isDate(arguments[2].type) || isDate32(arguments[2].type))) + else if (isDate(arguments[0].type) && isDate(arguments[2].type)) result_type = ResultType::Date; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); @@ -138,7 +138,7 @@ public: auto check_fourth_argument = [&] { - if (third_argument != ThirdArgument::IsOrigin) /// sanity check + if (overload != Overload::Origin) /// sanity check throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " "The third argument must a Date/Date32/DateTime/DateTime64 with a constant origin", arguments[2].type->getName(), getName()); @@ -185,7 +185,7 @@ public: return std::make_shared(); case ResultType::DateTime: { - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); } case ResultType::DateTime64: @@ -200,7 +200,7 @@ public: else if (interval_type->getKind() == IntervalKind::Millisecond) scale = 3 > scale ? 3 : scale; - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); } } @@ -278,25 +278,25 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName()); } template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, - const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName()); const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be a const interval of time.", getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a const time interval", getName()); - Int64 num_units = interval_column_const_int64->getValue(); + const Int64 num_units = interval_column_const_int64->getValue(); if (num_units <= 0) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for 2nd argument of function {} must be positive", getName()); switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { @@ -328,12 +328,11 @@ private: } template - Int64 decideScaleOnPrecision(const UInt16 scale) const + Int64 decideScaleOnPrecision() const { static constexpr Int64 MILLISECOND_SCALE = 1000; static constexpr Int64 MICROSECOND_SCALE = 1000000; static constexpr Int64 NANOSECOND_SCALE = 1000000000; - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); switch (unit) { case IntervalKind::Millisecond: @@ -343,37 +342,41 @@ private: case IntervalKind::Nanosecond: return NANOSECOND_SCALE; default: - return scale_multiplier; + return 1; } } - template + template ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { - using ToColumnType = typename ToDataType::ColumnType; - using ToFieldType = typename ToDataType::FieldType; + using ResultColumnType = typename ResultDataType::ColumnType; + using ResultFieldType = typename ResultDataType::FieldType; const auto & time_data = time_column_type.getData(); size_t size = time_data.size(); auto result_col = result_type->createColumn(); - auto * col_to = assert_cast(result_col.get()); + auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); result_data.resize(size); - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - Int64 scale_on_interval = decideScaleOnPrecision(scale); - Int64 scale_diff = scale_on_interval > scale_multiplier ? scale_on_interval / scale_multiplier : scale_multiplier / scale_on_interval; + Int64 scale_on_time = DecimalUtils::scaleMultiplier(scale); // scale that depends on type of arguments + Int64 scale_on_interval = decideScaleOnPrecision(); // scale that depends on the Interval + /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them + /// to get the right precision for the result. + Int64 scale_diff = scale_on_interval > scale_on_time ? scale_on_interval / scale_on_time : scale_on_time / scale_on_interval; if (origin_column.column == nullptr) { for (size_t i = 0; i != size; ++i) { result_data[i] = 0; - if (scale_on_interval < scale_multiplier) - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)) * scale_diff; + if (scale_on_interval < scale_on_time) + /// if we have a time argument that has bigger scale than the interval can contain, we need + /// to return a value with bigger precision and thus we should multiply result on the scale difference. + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_interval)) * scale_diff; else - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); } } else @@ -387,31 +390,54 @@ private: throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); t -= origin; - auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_multiplier)); + auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_on_time)); static constexpr size_t SECONDS_PER_DAY = 86400; result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) { + /// By default, when we use week, month, quarter or year interval, we get date return type. So, simply add values. if (isDate(result_type) || isDate32(result_type)) result_data[i] += origin + res; - else if (isDateTime64(result_type)) - result_data[i] += origin + (res * SECONDS_PER_DAY * scale_multiplier); - else + /// When we use DateTime arguments, we should keep in mind that we also have hours, minutes and seconds there, + /// so we need to multiply result by amount of seconds per day. + else if (isDateTime(result_type)) result_data[i] += origin + res * SECONDS_PER_DAY; + /// When we use DateTime64 arguments, we also should multiply it on right scale. + else + result_data[i] += origin + (res * SECONDS_PER_DAY * scale_on_time); } else { + /// In this case result will be calculated as datetime, so we need to get the amount of days if the arguments are Date. if (isDate(result_type) || isDate32(result_type)) res = res / SECONDS_PER_DAY; - if (scale_on_interval > scale_multiplier) - result_data[i] += (origin + res / scale_diff) * scale_diff; - else if (scale_on_interval == scale_multiplier && scale_on_interval % 1000 != 0 && scale_multiplier != 10) - result_data[i] += origin + (res * scale_on_interval); + /// Case when Interval has default scale + if (scale_on_interval == 1) + { + /// Case when the arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. + if (scale_on_time % 1000 != 0 && scale_on_time >= 1000) + result_data[i] += (origin + res / scale_on_time) * scale_on_time; + /// Special case when the arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't + else if (scale_on_time == 100) + result_data[i] += (origin + res * scale_on_time); + /// Cases when precision of DateTime64 is 1, 3, 6, 9 e.g. has right precision in res and origin. + else + result_data[i] += (origin + res); + } + /// Case when Interval has some specific scale (3,6,9) else - result_data[i] += origin + res * scale_diff; + { + /// If we have a time argument that has bigger scale than the interval can contain, we need + /// to return a value with bigger precision and thus we should multiply result on the scale difference. + if (scale_on_interval < scale_on_time) + result_data[i] += origin + res * scale_diff; + /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't + else + result_data[i] += (origin + res / scale_diff) * scale_diff; + } } } } @@ -419,8 +445,6 @@ private: } }; -} - REGISTER_FUNCTION(ToStartOfInterval) { factory.registerFunction(); From 36fa954f5000c8130f2556abba6052392dd16fa7 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:19:11 +0100 Subject: [PATCH 27/79] Stly check --- src/Functions/toStartOfInterval.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index f4171748b92..fb071c952ab 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -371,7 +371,7 @@ private: { result_data[i] = 0; if (scale_on_interval < scale_on_time) - /// if we have a time argument that has bigger scale than the interval can contain, we need + /// if we have a time argument that has bigger scale than the interval can contain, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_interval)) * scale_diff; else @@ -429,7 +429,7 @@ private: /// Case when Interval has some specific scale (3,6,9) else { - /// If we have a time argument that has bigger scale than the interval can contain, we need + /// If we have a time argument that has bigger scale than the interval can contain, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. if (scale_on_interval < scale_on_time) result_data[i] += origin + res * scale_diff; From 202ca21e3f1366479ec4f0d786adeab6e54cc3d4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 13 Dec 2023 15:16:51 +0100 Subject: [PATCH 28/79] fix tests --- src/Functions/toStartOfInterval.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index fb071c952ab..56e721b7601 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -370,12 +370,12 @@ private: for (size_t i = 0; i != size; ++i) { result_data[i] = 0; - if (scale_on_interval < scale_on_time) - /// if we have a time argument that has bigger scale than the interval can contain, we need + if (scale_on_interval < scale_on_time && scale_on_interval != 1) + /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_interval)) * scale_diff; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)) * scale_diff; else - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); } } else From 174309821a9813dba1d2a090768332e9de9470e3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 19 Dec 2023 20:48:30 +0000 Subject: [PATCH 29/79] Small fixups --- src/Functions/toStartOfInterval.cpp | 181 +++++++++++++--------------- 1 file changed, 85 insertions(+), 96 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 56e721b7601..81a2fd0a75d 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes class FunctionToStartOfInterval : public IFunction { -public: +private: enum class Overload { Default, /// toStartOfInterval(time, interval) or toStartOfInterval(time, interval, timezone) @@ -39,6 +39,7 @@ public: }; mutable Overload overload; +public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } static constexpr auto name = "toStartOfInterval"; @@ -82,7 +83,9 @@ public: "Illegal type {} of 2nd argument of function {}, expected a time interval", type_arg2->getName(), getName()); - /// Result here is determined for default overload (without origin) + overload = Overload::Default; + + /// Determine result type for default overload (no origin) switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: @@ -110,21 +113,20 @@ public: const DataTypePtr & type_arg3 = arguments[2].type; if (isString(type_arg3)) { - overload = Overload::Default; - if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + "A timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); } - else if (isDateTimeOrDateTime64(type_arg3) || isDate(type_arg3)) + else if (isDate(type_arg3) || isDateTime(type_arg3) || isDateTime64(type_arg3)) { overload = Overload::Origin; - if (isDateTime64(arguments[0].type) && isDateTime64(arguments[2].type)) + const DataTypePtr & type_arg1 = arguments[0].type; + if (isDateTime64(type_arg1) && isDateTime64(type_arg3)) result_type = ResultType::DateTime64; - else if (isDateTime(arguments[0].type) && isDateTime(arguments[2].type)) + else if (isDateTime(type_arg1) && isDateTime(type_arg3)) result_type = ResultType::DateTime; - else if (isDate(arguments[0].type) && isDate(arguments[2].type)) + else if (isDate(type_arg1) && isDate(type_arg3)) result_type = ResultType::Date; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); @@ -149,7 +151,7 @@ public: type_arg4->getName(), getName()); if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + "A timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); }; @@ -190,14 +192,14 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDate32(arguments[0].type) || isDateTime64(arguments[0].type)) + if (isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9 > scale ? 9 : scale; + scale = (9 > scale) ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6 > scale ? 6 : scale; + scale = (6 > scale) ? 6 : scale; else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3 > scale ? 3 : scale; + scale = (3 > scale) ? 3 : scale; const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); @@ -213,20 +215,19 @@ public: const auto & interval_column = arguments[1]; ColumnWithTypeAndName origin_column; - const bool has_origin_arg = (arguments.size() == 3 && isDateOrDate32OrDateTimeOrDateTime64(arguments[2].type)) || arguments.size() == 4; - if (has_origin_arg) + if (overload == Overload::Origin) origin_column = arguments[2]; - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; + const size_t time_zone_arg_num = (overload == Overload::Origin) ? 3 : 2; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); - ColumnPtr result_column = nullptr; - if (isDateTime64(result_type)) - result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + ColumnPtr result_column; + if (isDate(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); else if (isDateTime(result_type)) result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); - else - result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else if (isDateTime64(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); return result_column; } @@ -238,44 +239,24 @@ private: const auto & time_column_type = *time_column.type.get(); const auto & time_column_col = *time_column.column.get(); - if (isDateTime64(time_column_type)) + if (isDate(time_column_type)) { - if (origin_column.column != nullptr && !isDateTime64(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - - const auto * time_column_vec = checkAndGetColumn(time_column_col); - auto scale = assert_cast(time_column_type).getScale(); - - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); - } - else if (isDateTime(time_column_type)) - { - if (origin_column.column != nullptr && !isDateTime(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - - const auto * time_column_vec = checkAndGetColumn(time_column_col); - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); - } - else if (isDate(time_column_type)) - { - if (origin_column.column != nullptr && !isDate(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } - else if (isDate32(time_column_type)) + else if (isDateTime(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDate32(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + } + else if (isDateTime64(time_column_type)) + { + const auto * time_column_vec = checkAndGetColumn(time_column_col); + auto scale = assert_cast(time_column_type).getScale(); + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName()); } @@ -327,26 +308,23 @@ private: } template - Int64 decideScaleOnPrecision() const + static Int64 scaleFromInterval() { - static constexpr Int64 MILLISECOND_SCALE = 1000; - static constexpr Int64 MICROSECOND_SCALE = 1000000; - static constexpr Int64 NANOSECOND_SCALE = 1000000000; switch (unit) { case IntervalKind::Millisecond: - return MILLISECOND_SCALE; + return 1'000; case IntervalKind::Microsecond: - return MICROSECOND_SCALE; + return 1'000'000; case IntervalKind::Nanosecond: - return NANOSECOND_SCALE; + return 1'000'000'000; default: return 1; } } template - ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const { using ResultColumnType = typename ResultDataType::ColumnType; using ResultFieldType = typename ResultDataType::FieldType; @@ -359,23 +337,29 @@ private: auto & result_data = col_to->getData(); result_data.resize(size); - Int64 scale_on_time = DecimalUtils::scaleMultiplier(scale); // scale that depends on type of arguments - Int64 scale_on_interval = decideScaleOnPrecision(); // scale that depends on the Interval + const Int64 scale_time = DecimalUtils::scaleMultiplier(scale); + const Int64 scale_interval = scaleFromInterval(); + /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them /// to get the right precision for the result. - Int64 scale_diff = scale_on_interval > scale_on_time ? scale_on_interval / scale_on_time : scale_on_time / scale_on_interval; + const Int64 scale_diff = (scale_interval > scale_time) ? (scale_interval / scale_time) : (scale_time / scale_interval); if (origin_column.column == nullptr) { - for (size_t i = 0; i != size; ++i) + if (scale_time > scale_interval && scale_interval != 1) { - result_data[i] = 0; - if (scale_on_interval < scale_on_time && scale_on_interval != 1) + for (size_t i = 0; i != size; ++i) + { /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)) * scale_diff; - else - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); + result_data[i] = 0; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)) * scale_diff; + } + } + else + { + for (size_t i = 0; i != size; ++i) + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)); } } else @@ -386,55 +370,60 @@ private: { auto t = time_data[i]; if (origin > static_cast(t)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); + + /// The trick to calculate the interval starting from an offset is to + /// 1. subtract the offset, + /// 2. perform the calculation, and + /// 3. add the offset to the result. t -= origin; - auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_on_time)); + auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_time)); - static constexpr size_t SECONDS_PER_DAY = 86400; + static constexpr size_t SECONDS_PER_DAY = 86'400; result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) { - /// By default, when we use week, month, quarter or year interval, we get date return type. So, simply add values. - if (isDate(result_type) || isDate32(result_type)) + /// For such intervals, ToStartOfInterval::execute() returns days + if (isDate(result_type)) result_data[i] += origin + res; - /// When we use DateTime arguments, we should keep in mind that we also have hours, minutes and seconds there, - /// so we need to multiply result by amount of seconds per day. else if (isDateTime(result_type)) result_data[i] += origin + res * SECONDS_PER_DAY; - /// When we use DateTime64 arguments, we also should multiply it on right scale. - else - result_data[i] += origin + (res * SECONDS_PER_DAY * scale_on_time); + else if (isDateTime64(result_type)) + result_data[i] += origin + (res * SECONDS_PER_DAY * scale_time); } else { - /// In this case result will be calculated as datetime, so we need to get the amount of days if the arguments are Date. - if (isDate(result_type) || isDate32(result_type)) + /// ToStartOfInterval::execute() returns seconds + + if (isDate(result_type)) res = res / SECONDS_PER_DAY; - /// Case when Interval has default scale - if (scale_on_interval == 1) + if (scale_interval == 1) { - /// Case when the arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. - if (scale_on_time % 1000 != 0 && scale_on_time >= 1000) - result_data[i] += (origin + res / scale_on_time) * scale_on_time; - /// Special case when the arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't - else if (scale_on_time == 100) - result_data[i] += (origin + res * scale_on_time); - /// Cases when precision of DateTime64 is 1, 3, 6, 9 e.g. has right precision in res and origin. + /// Interval has default scale, i.e. Year - Second + + if (scale_time % 1000 != 0 && scale_time >= 1000) + /// The arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. + result_data[i] += (origin + res / scale_time) * scale_time; + else if (scale_time == 100) + /// The arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't + result_data[i] += (origin + res * scale_time); else + /// Precision of DateTime64 is 1, 3, 6, 9, e.g. has right precision in res and origin. result_data[i] += (origin + res); } - /// Case when Interval has some specific scale (3,6,9) else { - /// If we have a time argument that has bigger scale than the interval can contain, we need - /// to return a value with bigger precision and thus we should multiply result on the scale difference. - if (scale_on_interval < scale_on_time) + /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond + + if (scale_interval < scale_time) + /// If we have a time argument that has bigger scale than the interval can contain, we need + /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] += origin + res * scale_diff; - /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't else + /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't result_data[i] += (origin + res / scale_diff) * scale_diff; } } From 861421d27ac74fe11921c4ba901dcccb94df76e8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 5 Jan 2024 16:03:22 +0000 Subject: [PATCH 30/79] fixes --- src/Functions/DateTimeTransforms.h | 57 ++++++--- src/Functions/toStartOfInterval.cpp | 117 ++++++++++++------ ...to_start_of_interval_with_origin.reference | 32 ++--- ...02916_to_start_of_interval_with_origin.sql | 8 +- 4 files changed, 139 insertions(+), 75 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 74b37e18907..dbe2b11d7b2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -487,7 +487,7 @@ struct ToStartOfInterval { throwDateTimeIsNotSupported(TO_START_OF_INTERVAL_NAME); } - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier, Int64 /*origin*/ = 0) { if (scale_multiplier < 1000000000) { @@ -522,7 +522,7 @@ struct ToStartOfInterval { throwDateTimeIsNotSupported(TO_START_OF_INTERVAL_NAME); } - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier, Int64 /*origin*/ = 0) { if (scale_multiplier < 1000000) { @@ -565,7 +565,7 @@ struct ToStartOfInterval { throwDateTimeIsNotSupported(TO_START_OF_INTERVAL_NAME); } - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier, Int64 /*origin*/ = 0) { if (scale_multiplier < 1000) { @@ -608,7 +608,7 @@ struct ToStartOfInterval { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } @@ -629,7 +629,7 @@ struct ToStartOfInterval { return time_zone.toStartOfMinuteInterval(t, minutes); } - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } @@ -650,7 +650,7 @@ struct ToStartOfInterval { return time_zone.toStartOfHourInterval(t, hours); } - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } @@ -671,7 +671,7 @@ struct ToStartOfInterval { return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } @@ -692,9 +692,12 @@ struct ToStartOfInterval { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); + if (origin == 0) + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); + else + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); } }; @@ -713,9 +716,24 @@ struct ToStartOfInterval { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + if (origin == 0) + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + else + { + Int64 days = time_zone.toDayOfMonth(t / scale_multiplier + origin) - time_zone.toDayOfMonth(origin); + Int64 months_to_add = time_zone.toMonth(t / scale_multiplier + origin) - time_zone.toMonth(origin); + Int64 years = time_zone.toYear(t / scale_multiplier + origin) - time_zone.toYear(origin); + months_to_add = days < 0 ? months_to_add - 1 : months_to_add; + months_to_add += years * 12; + Int64 month_multiplier = (months_to_add / months) * months; + Int64 a = 0; + + a = time_zone.addMonths(time_zone.toDate(origin), month_multiplier); + // a += time_zone.toTime(origin); + return a - time_zone.toDate(origin); + } } }; @@ -734,9 +752,12 @@ struct ToStartOfInterval { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); + if (origin == 0) + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); + else + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); } }; @@ -755,9 +776,15 @@ struct ToStartOfInterval { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); + if (origin == 0) + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); + else + { + auto a = ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); + return a; + } } }; diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 81a2fd0a75d..1ba7fed4bee 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,17 +1,17 @@ +#include +#include +#include #include #include #include #include -#include "DataTypes/IDataType.h" #include #include #include #include #include -#include #include #include -#include #include #include #include @@ -270,6 +270,27 @@ private: if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName()); + if (isDate(time_data_type) || isDateTime(time_data_type)) + { + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) + { + case IntervalKind::Nanosecond: + case IntervalKind::Microsecond: + case IntervalKind::Millisecond: + if (isDate(time_data_type) || isDateTime(time_data_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); + break; + case IntervalKind::Second: + case IntervalKind::Minute: + case IntervalKind::Hour: + if (isDate(time_data_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); + break; + default: + break; + } + } + const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a const time interval", getName()); @@ -337,94 +358,110 @@ private: auto & result_data = col_to->getData(); result_data.resize(size); - const Int64 scale_time = DecimalUtils::scaleMultiplier(scale); + const Int64 scale_endtime = DecimalUtils::scaleMultiplier(scale); const Int64 scale_interval = scaleFromInterval(); /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them /// to get the right precision for the result. - const Int64 scale_diff = (scale_interval > scale_time) ? (scale_interval / scale_time) : (scale_time / scale_interval); + const Int64 scale_diff = (scale_interval > scale_endtime) ? (scale_interval / scale_endtime) : (scale_endtime / scale_interval); if (origin_column.column == nullptr) { - if (scale_time > scale_interval && scale_interval != 1) + if (scale_endtime > scale_interval && scale_interval != 1) { for (size_t i = 0; i != size; ++i) { /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] = 0; - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)) * scale_diff; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)) * scale_diff; } } else { for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)); } } else { UInt64 origin = origin_column.column->get64(0); + Int64 origin_scale = 1; + if (isDateTime64(origin_column.type.get())) + origin_scale = assert_cast(*origin_column.type.get()).getScale(); for (size_t i = 0; i != size; ++i) { - auto t = time_data[i]; - if (origin > static_cast(t)) + UInt64 end_time = time_data[i]; + + if (origin > static_cast(end_time) && origin_scale == scale) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); + else if (origin_scale > scale) + origin /= static_cast(std::pow(10, origin_scale - scale)); /// If aguments have different scales, we make + else if (origin_scale < scale) /// origin argument to have the same scale as the first argument. + origin *= static_cast(std::pow(10, scale - origin_scale)); /// The trick to calculate the interval starting from an offset is to /// 1. subtract the offset, /// 2. perform the calculation, and /// 3. add the offset to the result. - t -= origin; - auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_time)); - static constexpr size_t SECONDS_PER_DAY = 86'400; - result_data[i] = 0; - if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + + if (isDate(origin_column.type.get())) /// We need to perform calculations on dateTime (dateTime64) values only. { - /// For such intervals, ToStartOfInterval::execute() returns days - if (isDate(result_type)) - result_data[i] += origin + res; - else if (isDateTime(result_type)) - result_data[i] += origin + res * SECONDS_PER_DAY; - else if (isDateTime64(result_type)) - result_data[i] += origin + (res * SECONDS_PER_DAY * scale_time); + end_time *= SECONDS_PER_DAY; + origin *= SECONDS_PER_DAY; + } + + Int64 delta = (end_time - origin) * (isDateTime64(origin_column.type.get()) ? 1 : scale_endtime); /// No need to multiply on scale endtime if we have dateTime64 argument. + Int64 offset = 0; + + { + auto origin_data = isDateTime64(result_type) ? origin / scale_endtime : origin; + offset = static_cast(ToStartOfInterval::execute(delta, num_units, time_zone, scale_endtime, origin_data)); + } + + + if (isDate(result_type)) /// The result should be a date and the calculations were as datetime. + result_data[i] += (origin + offset) / SECONDS_PER_DAY; + else if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + { + if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. + offset *= scale_endtime; + + result_data[i] += origin + offset; } else { - /// ToStartOfInterval::execute() returns seconds - - if (isDate(result_type)) - res = res / SECONDS_PER_DAY; + /// ToStartOfInterval::execute() returns seconds. if (scale_interval == 1) { - /// Interval has default scale, i.e. Year - Second + if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. + offset *= scale_endtime; - if (scale_time % 1000 != 0 && scale_time >= 1000) - /// The arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. - result_data[i] += (origin + res / scale_time) * scale_time; - else if (scale_time == 100) - /// The arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't - result_data[i] += (origin + res * scale_time); + /// Interval has default scale, i.e. Year - Second. + + if (scale_endtime % 1000 != 0 && scale_endtime >= 1000) + /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has right precision and origin doesn't. + result_data[i] += (origin + offset / scale_endtime) * scale_endtime; else - /// Precision of DateTime64 is 1, 3, 6, 9, e.g. has right precision in res and origin. - result_data[i] += (origin + res); + /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has right precision in offset and origin. + result_data[i] += (origin + offset); } else { - /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond + /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond. - if (scale_interval < scale_time) + if (scale_interval < scale_endtime) /// If we have a time argument that has bigger scale than the interval can contain, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += origin + res * scale_diff; + result_data[i] += origin + offset * scale_diff; else - /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't - result_data[i] += (origin + res / scale_diff) * scale_diff; + /// The other case: interval has bigger scale than the interval or they have the same scale, so offset has the right precision and origin doesn't. + result_data[i] += (origin + offset / scale_diff) * scale_diff; } } } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 870853bc371..969e2726902 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -1,40 +1,40 @@ -- Negative tests -Time and origin as Time +Time and origin as Date 2023-02-01 2023-08-01 -2023-10-09 -2023-10-05 2023-10-08 +2023-10-08 +2023-10-09 Time and origin as DateTime 2023-02-01 09:08:07 2023-08-01 09:08:07 -2023-10-09 09:08:07 -2023-10-05 09:08:07 +2023-10-08 09:08:07 +2023-10-08 09:08:07 2023-10-09 09:08:07 2023-10-09 10:10:07 -2023-10-09 10:11:11 +2023-10-09 10:11:07 2023-10-09 10:11:12 Time and origin as DateTime64(9) 2023-02-01 09:08:07.123456789 2023-08-01 09:08:07.123456789 2023-09-10 09:08:07.123456789 -2023-10-05 09:08:07.123456789 -2023-10-08 09:08:07.123543189 -2023-10-09 09:10:07.123460389 -2023-10-09 10:10:11.123456849 -2023-10-09 10:11:10.123456791 +2023-10-08 09:08:07.123456789 +2023-10-09 09:08:07.123456789 +2023-10-09 10:10:07.123456789 +2023-10-09 10:11:11.123456789 +2023-10-09 10:11:12.123456789 2023-10-09 10:11:12.987456789 2023-10-09 10:11:12.987653789 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) 2023-02-01 09:08:07.123 2023-08-01 09:08:07.123 +2023-10-08 09:08:07.123 +2023-10-08 09:08:07.123 2023-10-09 09:08:07.123 -2023-10-05 09:08:07.123 -2023-10-08 09:09:33.523 -2023-10-09 09:10:10.723 -2023-10-09 10:10:11.183 -2023-10-09 10:11:10.125 +2023-10-09 10:10:07.123 +2023-10-09 10:11:11.123 +2023-10-09 10:11:12.123 2023-10-09 10:11:12.987 2023-10-09 10:11:12.987000 2023-10-09 10:11:12.987000000 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 71f5fb7fb36..4f8a96b093d 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -23,7 +23,7 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5 -- too many arguments SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT 'Time and origin as Time'; +SELECT 'Time and origin as Date'; SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); @@ -43,8 +43,8 @@ SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalDay(1), toDateTime('2023-10-08 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalHour(1), toDateTime('2023-10-09 09:10:07')); -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 10:10:11')); -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalSecond(1), toDateTime('2023-10-09 10:11:10')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 09:10:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalSecond(1), toDateTime('2023-10-09 09:10:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMillisecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMicrosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalNanosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -56,7 +56,7 @@ SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toInt SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123456789', 9)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 09:10:11.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); From cb645f82198250b119d734291456afe8dcdde27f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 5 Jan 2024 17:18:56 +0100 Subject: [PATCH 31/79] fix style --- src/Functions/toStartOfInterval.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 1ba7fed4bee..ffabf38ef20 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -397,7 +397,7 @@ private: if (origin > static_cast(end_time) && origin_scale == scale) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); else if (origin_scale > scale) - origin /= static_cast(std::pow(10, origin_scale - scale)); /// If aguments have different scales, we make + origin /= static_cast(std::pow(10, origin_scale - scale)); /// If arguments have different scales, we make else if (origin_scale < scale) /// origin argument to have the same scale as the first argument. origin *= static_cast(std::pow(10, scale - origin_scale)); @@ -439,16 +439,16 @@ private: if (scale_interval == 1) { - if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. + if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the correct scale. offset *= scale_endtime; /// Interval has default scale, i.e. Year - Second. if (scale_endtime % 1000 != 0 && scale_endtime >= 1000) - /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has right precision and origin doesn't. + /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has correct precision and origin doesn't. result_data[i] += (origin + offset / scale_endtime) * scale_endtime; else - /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has right precision in offset and origin. + /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has correct precision in offset and origin. result_data[i] += (origin + offset); } else From 1117284be7f72a22ac841af8f1a91ec853adc900 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 5 Jan 2024 23:43:58 +0000 Subject: [PATCH 32/79] fix overflow --- src/Functions/DateTimeTransforms.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index dbe2b11d7b2..e9cee9616fb 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -697,7 +697,12 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); else - return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); + { + if (const auto weeks_to_days = weeks * 7; weeks_to_days / 7 == weeks) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, weeks_to_days, time_zone, scale_multiplier, origin); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); + } } }; @@ -728,11 +733,8 @@ struct ToStartOfInterval months_to_add = days < 0 ? months_to_add - 1 : months_to_add; months_to_add += years * 12; Int64 month_multiplier = (months_to_add / months) * months; - Int64 a = 0; - a = time_zone.addMonths(time_zone.toDate(origin), month_multiplier); - // a += time_zone.toTime(origin); - return a - time_zone.toDate(origin); + return time_zone.addMonths(time_zone.toDate(origin), month_multiplier) - time_zone.toDate(origin); } } }; @@ -757,7 +759,12 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); else - return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); + { + if (const auto quarters_to_months = quarters * 3; quarters_to_months / 3 == quarters) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, quarters_to_months, time_zone, scale_multiplier, origin); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); + } } }; @@ -782,8 +789,10 @@ struct ToStartOfInterval return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); else { - auto a = ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); - return a; + if (const auto years_to_months = years * 12; years_to_months / 12 == years) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, years_to_months, time_zone, scale_multiplier, origin); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); } } }; From 07f031ec8a26d0ee7081d725fe59b312101bcae8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 8 Jan 2024 15:34:22 +0100 Subject: [PATCH 33/79] fix fuzzer --- src/Functions/DateTimeTransforms.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index e9cee9616fb..dd843daed8c 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -20,6 +20,7 @@ namespace DB { +static Int64 Int64_max_value = std::numeric_limits::max(); static constexpr auto microsecond_multiplier = 1000000; static constexpr auto millisecond_multiplier = 1000; @@ -698,8 +699,8 @@ struct ToStartOfInterval return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); else { - if (const auto weeks_to_days = weeks * 7; weeks_to_days / 7 == weeks) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, weeks_to_days, time_zone, scale_multiplier, origin); + if (weeks < Int64_max_value / 7) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); } @@ -760,8 +761,8 @@ struct ToStartOfInterval return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); else { - if (const auto quarters_to_months = quarters * 3; quarters_to_months / 3 == quarters) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, quarters_to_months, time_zone, scale_multiplier, origin); + if (quarters < Int64_max_value / 3) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); } @@ -789,8 +790,8 @@ struct ToStartOfInterval return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); else { - if (const auto years_to_months = years * 12; years_to_months / 12 == years) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, years_to_months, time_zone, scale_multiplier, origin); + if (years < Int64_max_value / 12) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); } From 26561c6bdd22085d6fe8537fa37cbf12de573efd Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Jan 2024 17:47:17 +0100 Subject: [PATCH 34/79] fix due to #58557 --- .../02916_to_start_of_interval_with_origin.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 969e2726902..552323be1a5 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -24,7 +24,7 @@ Time and origin as DateTime64(9) 2023-10-09 10:11:11.123456789 2023-10-09 10:11:12.123456789 2023-10-09 10:11:12.987456789 -2023-10-09 10:11:12.987653789 +2023-10-09 10:11:12.987654789 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) 2023-02-01 09:08:07.123 From 85a35dce28d3a367cf306c2e95edb41a3484a9c8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Jan 2024 17:22:16 +0000 Subject: [PATCH 35/79] fix tests --- .../0_stateless/02207_subseconds_intervals.reference | 6 +++--- .../02956_fix_to_start_of_milli_microsecond.reference | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index b0edbda5e76..6cde773c3c4 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,14 +10,14 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.12345600 -1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.12345700 +1980-12-12 12:12:12.12345700 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 1930-12-12 12:12:12.12345600 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.12345600 +2220-12-12 12:12:12.12345700 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index d3a002c4fd4..dff0c2a9585 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -1,4 +1,4 @@ -2023-10-09 10:11:12.001 -2023-10-09 10:11:12.001 -2023-10-09 10:11:12.000 -2023-10-09 10:11:12.000 +2023-10-09 10:11:12.001000 +2023-10-09 10:11:12.001000 +2023-10-09 10:11:12.000000 +2023-10-09 10:11:12.000000 From 9aa0fa11f843bf82432f93d497ec9a68dc756db6 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:01:53 +0200 Subject: [PATCH 36/79] IntervalKind -> IntervalKind::Kind --- src/Functions/DateTimeTransforms.h | 6 +-- src/Functions/toStartOfInterval.cpp | 70 ++++++++++++++--------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 8d70dbea685..20dc1bc21f2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -703,7 +703,7 @@ struct ToStartOfInterval else { if (weeks < Int64_max_value / 7) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); } @@ -765,7 +765,7 @@ struct ToStartOfInterval else { if (quarters < Int64_max_value / 3) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); } @@ -794,7 +794,7 @@ struct ToStartOfInterval else { if (years < Int64_max_value / 12) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); + return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); } diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index bdf947977b6..ab8dfef58ca 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -192,11 +192,11 @@ public: UInt32 scale = 0; if (isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); - if (interval_type->getKind() == IntervalKind::Nanosecond) + if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = (9 > scale) ? 9 : scale; - else if (interval_type->getKind() == IntervalKind::Microsecond) + else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) scale = (6 > scale) ? 6 : scale; - else if (interval_type->getKind() == IntervalKind::Millisecond) + else if (interval_type->getKind() == IntervalKind::Kind::Millisecond) scale = (3 > scale) ? 3 : scale; const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; @@ -272,15 +272,15 @@ private: { switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: if (isDate(time_data_type) || isDateTime(time_data_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); break; - case IntervalKind::Second: - case IntervalKind::Minute: - case IntervalKind::Hour: + case IntervalKind::Kind::Second: + case IntervalKind::Kind::Minute: + case IntervalKind::Kind::Hour: if (isDate(time_data_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); break; @@ -299,28 +299,28 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - case IntervalKind::Nanosecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Microsecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Millisecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Second: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Minute: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Hour: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Day: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Week: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Month: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Quarter: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Year: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Nanosecond: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Microsecond: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Millisecond: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Second: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Minute: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Hour: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Day: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Week: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Month: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Quarter: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Year: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); @@ -331,11 +331,11 @@ private: { switch (unit) { - case IntervalKind::Millisecond: + case IntervalKind::Kind::Millisecond: return 1'000; - case IntervalKind::Microsecond: + case IntervalKind::Kind::Microsecond: return 1'000'000; - case IntervalKind::Nanosecond: + case IntervalKind::Kind::Nanosecond: return 1'000'000'000; default: return 1; @@ -424,7 +424,7 @@ private: if (isDate(result_type)) /// The result should be a date and the calculations were as datetime. result_data[i] += (origin + offset) / SECONDS_PER_DAY; - else if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + else if (unit == IntervalKind::Kind::Week || unit == IntervalKind::Kind::Month || unit == IntervalKind::Kind::Quarter || unit == IntervalKind::Kind::Year) { if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. offset *= scale_endtime; From 6109da248f9a4a3d26e0ca8c41225129202e9688 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:29:13 +0200 Subject: [PATCH 37/79] fix test --- .../02956_fix_to_start_of_milli_microsecond.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index b005ce6dfb0..95a05a24981 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -2,6 +2,6 @@ 2023-10-09 10:11:12.001000 2023-10-09 10:11:12.000000 2023-10-09 10:11:12.000000 -2023-10-09 00:00:00.000000 -2023-10-09 00:00:00.000 +2023-10-09 00:00:00.000000000 +2023-10-09 00:00:00.000000000 2023-10-09 00:00:00 From 938c888b131d15b311c8368337d36a52ad2b0a02 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 11 Apr 2024 17:01:01 +0200 Subject: [PATCH 38/79] Reload CI to remove build error (empty commit) From a959663e977361af3896f010f9aeadaa8b64c323 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 23 May 2024 16:43:18 +0200 Subject: [PATCH 39/79] Update toStartOfInterval.cpp --- src/Functions/toStartOfInterval.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 749aa9f8800..a4f870613d3 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -241,14 +241,14 @@ private: if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(&time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } From 1f26281493d66f15408d8459c198170e71901f68 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 27 May 2024 15:43:14 +0200 Subject: [PATCH 40/79] build fix --- src/Functions/toStartOfInterval.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index a4f870613d3..c45501aa905 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -241,20 +241,20 @@ private: if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(&time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime64(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) From 9a7888ba516420c00978901bbe5b85cafe6e7bdf Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 5 Jun 2024 16:39:54 +0200 Subject: [PATCH 41/79] fix tests --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index c45501aa905..b98d78171ae 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -376,7 +376,7 @@ private: /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] = 0; - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)) * scale_diff; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)); } } else From 63b0d13a62c84245a3efe6e3cd9cf08cd2a588a8 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 6 Jun 2024 16:15:35 +0000 Subject: [PATCH 42/79] trying to fix tests --- src/Functions/toStartOfInterval.cpp | 2 -- .../02207_subseconds_intervals.reference | 16 ++++++++-------- ...6_fix_to_start_of_milli_microsecond.reference | 12 ++++++------ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b98d78171ae..e358addf972 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -192,8 +192,6 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDateTime64(arguments[0].type)) - scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = (9 > scale) ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 6cde773c3c4..7e6d64b6b9f 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.12345700 -1980-12-12 12:12:12.12345700 +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123457 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.12345600 +1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.12345700 +2220-12-12 12:12:12.123457 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123000 -1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123000 +1930-12-12 12:12:12.123 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123000 +2220-12-12 12:12:12.123 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index 95a05a24981..413c79828c7 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -1,7 +1,7 @@ -2023-10-09 10:11:12.001000 -2023-10-09 10:11:12.001000 -2023-10-09 10:11:12.000000 -2023-10-09 10:11:12.000000 -2023-10-09 00:00:00.000000000 -2023-10-09 00:00:00.000000000 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.000 +2023-10-09 10:11:12.000 +2023-10-09 00:00:00.000000 +2023-10-09 00:00:00.000 2023-10-09 00:00:00 From c5ba8eb477fb168d38dd1493b2b20a121a3ed1fd Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 6 Jun 2024 16:46:38 +0000 Subject: [PATCH 43/79] fix for origin --- src/Functions/toStartOfInterval.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e358addf972..cc5ffd56976 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -192,6 +192,8 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; + if (isDateTime64(arguments[0].type) && overload == Overload::Origin) + scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = (9 > scale) ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) From 4fad12ecb075dd1ba4faee65b7d2155085fa2e5f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 3 Sep 2024 19:53:28 +0200 Subject: [PATCH 44/79] rewrite the main logic --- src/Functions/DateTimeTransforms.h | 35 +-- src/Functions/toStartOfInterval.cpp | 199 ++++++------------ ...to_start_of_interval_with_origin.reference | 4 +- 3 files changed, 79 insertions(+), 159 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 50907e3e87e..9824608f067 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -22,7 +22,6 @@ namespace DB { -static Int64 Int64_max_value = std::numeric_limits::max(); static constexpr auto millisecond_multiplier = 1'000; static constexpr auto microsecond_multiplier = 1'000'000; static constexpr auto nanosecond_multiplier = 1'000'000'000; @@ -701,12 +700,8 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); else - { - if (weeks < Int64_max_value / 7) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); - } + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); + } }; @@ -727,18 +722,20 @@ struct ToStartOfInterval } static Int64 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { + const Int64 scaled_time = t / scale_multiplier; if (origin == 0) - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(scaled_time), months); else { - Int64 days = time_zone.toDayOfMonth(t / scale_multiplier + origin) - time_zone.toDayOfMonth(origin); - Int64 months_to_add = time_zone.toMonth(t / scale_multiplier + origin) - time_zone.toMonth(origin); - Int64 years = time_zone.toYear(t / scale_multiplier + origin) - time_zone.toYear(origin); + const Int64 scaled_origin = origin / scale_multiplier; + const Int64 days = time_zone.toDayOfMonth(scaled_time + scaled_origin) - time_zone.toDayOfMonth(scaled_origin); + Int64 months_to_add = time_zone.toMonth(scaled_time + scaled_origin) - time_zone.toMonth(scaled_origin); + const Int64 years = time_zone.toYear(scaled_time + scaled_origin) - time_zone.toYear(scaled_origin); months_to_add = days < 0 ? months_to_add - 1 : months_to_add; months_to_add += years * 12; Int64 month_multiplier = (months_to_add / months) * months; - return time_zone.addMonths(time_zone.toDate(origin), month_multiplier) - time_zone.toDate(origin); + return (time_zone.addMonths(time_zone.toDate(scaled_origin), month_multiplier) - time_zone.toDate(scaled_origin)); } } }; @@ -763,12 +760,7 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); else - { - if (quarters < Int64_max_value / 3) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); - } + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); } }; @@ -792,12 +784,7 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); else - { - if (years < Int64_max_value / 12) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); - } + return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin);z } }; diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index cc5ffd56976..b749d92eeb6 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,6 +1,3 @@ -#include -#include -#include #include #include #include @@ -13,6 +10,8 @@ #include #include #include +#include +#include namespace DB @@ -176,7 +175,7 @@ public: else { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2, 3 or 4", + "Number of arguments for function {} doesn't match: passed {}, must be 2, 3 or 4", getName(), arguments.size()); } @@ -193,13 +192,17 @@ public: { UInt32 scale = 0; if (isDateTime64(arguments[0].type) && overload == Overload::Origin) + { scale = assert_cast(*arguments[0].type.get()).getScale(); + if (assert_cast(*arguments[2].type.get()).getScale() != scale) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same scale", getName()); + } if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) - scale = (9 > scale) ? 9 : scale; + scale = 9; else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) - scale = (6 > scale) ? 6 : scale; + scale = 6; else if (interval_type->getKind() == IntervalKind::Kind::Millisecond) - scale = (3 > scale) ? 3 : scale; + scale = 3; const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); @@ -218,7 +221,7 @@ public: if (overload == Overload::Origin) origin_column = arguments[2]; - const size_t time_zone_arg_num = (overload == Overload::Origin) ? 3 : 2; + const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); ColumnPtr result_column; @@ -272,25 +275,22 @@ private: if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName()); - if (isDate(time_data_type) || isDateTime(time_data_type)) + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) - { - case IntervalKind::Kind::Nanosecond: - case IntervalKind::Kind::Microsecond: - case IntervalKind::Kind::Millisecond: - if (isDate(time_data_type) || isDateTime(time_data_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); - break; - case IntervalKind::Kind::Second: - case IntervalKind::Kind::Minute: - case IntervalKind::Kind::Hour: - if (isDate(time_data_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); - break; - default: - break; - } + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: + if (isDate(time_data_type) || isDateTime(time_data_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); + break; + case IntervalKind::Kind::Second: + case IntervalKind::Kind::Minute: + case IntervalKind::Kind::Hour: + if (isDate(time_data_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); + break; + default: + break; } const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); @@ -330,27 +330,10 @@ private: std::unreachable(); } - template - static Int64 scaleFromInterval() - { - switch (unit) - { - case IntervalKind::Kind::Millisecond: - return 1'000; - case IntervalKind::Kind::Microsecond: - return 1'000'000; - case IntervalKind::Kind::Nanosecond: - return 1'000'000'000; - default: - return 1; - } - } - template ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const { using ResultColumnType = typename ResultDataType::ColumnType; - using ResultFieldType = typename ResultDataType::FieldType; const auto & time_data = time_column_type.getData(); size_t size = time_data.size(); @@ -360,114 +343,64 @@ private: auto & result_data = col_to->getData(); result_data.resize(size); - const Int64 scale_endtime = DecimalUtils::scaleMultiplier(scale); - const Int64 scale_interval = scaleFromInterval(); + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them - /// to get the right precision for the result. - const Int64 scale_diff = (scale_interval > scale_endtime) ? (scale_interval / scale_endtime) : (scale_endtime / scale_interval); + if (origin_column.column) // Overload: Origin + { + const bool is_small_interval = (unit == IntervalKind::Kind::Nanosecond || unit == IntervalKind::Kind::Microsecond || unit == IntervalKind::Kind::Millisecond); + const bool is_result_date = isDate(result_type); - if (origin_column.column == nullptr) - { - if (scale_endtime > scale_interval && scale_interval != 1) - { - for (size_t i = 0; i != size; ++i) - { - /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need - /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] = 0; - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)); - } - } - else - { - for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)); - } - } - else - { - UInt64 origin = origin_column.column->get64(0); + Int64 result_scale = scale_multiplier; Int64 origin_scale = 1; - if (isDateTime64(origin_column.type.get())) - origin_scale = assert_cast(*origin_column.type.get()).getScale(); + if (isDateTime64(result_type)) /// We have origin scale only in case if arguments are DateTime64. + origin_scale = assert_cast(*origin_column.type).getScaleMultiplier(); + else if (!is_small_interval) /// In case of large interval and arguments are not DateTime64, we should not have scale in result. + result_scale = 1; + + if (is_small_interval) + result_scale = assert_cast(*result_type).getScaleMultiplier(); + + /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them + /// to get the right precision for the result. In case of large intervals, we should not have scale difference. + Int64 scale_diff = is_small_interval ? std::max(result_scale / origin_scale, origin_scale / result_scale) : 1; + + static constexpr Int64 SECONDS_PER_DAY = 86'400; + + UInt64 origin = origin_column.column->get64(0); for (size_t i = 0; i != size; ++i) { - UInt64 end_time = time_data[i]; - - if (origin > static_cast(end_time) && origin_scale == scale) + UInt64 time_arg = time_data[i]; + if (origin > static_cast(time_arg)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); - else if (origin_scale > scale) - origin /= static_cast(std::pow(10, origin_scale - scale)); /// If arguments have different scales, we make - else if (origin_scale < scale) /// origin argument to have the same scale as the first argument. - origin *= static_cast(std::pow(10, scale - origin_scale)); - /// The trick to calculate the interval starting from an offset is to - /// 1. subtract the offset, - /// 2. perform the calculation, and - /// 3. add the offset to the result. - - static constexpr size_t SECONDS_PER_DAY = 86'400; - result_data[i] = 0; - - if (isDate(origin_column.type.get())) /// We need to perform calculations on dateTime (dateTime64) values only. + if (is_result_date) /// All internal calculations of ToStartOfInterval<...> expect arguments to be seconds or milli-, micro-, nanoseconds. { - end_time *= SECONDS_PER_DAY; + time_arg *= SECONDS_PER_DAY; origin *= SECONDS_PER_DAY; } - Int64 delta = (end_time - origin) * (isDateTime64(origin_column.type.get()) ? 1 : scale_endtime); /// No need to multiply on scale endtime if we have dateTime64 argument. - Int64 offset = 0; + Int64 offset = ToStartOfInterval::execute(time_arg - origin, num_units, time_zone, result_scale, origin); + /// In case if arguments are DateTime64 with large interval, we should apply scale on it. + offset *= (!is_small_interval) ? result_scale : 1; + + if (is_result_date) /// Convert back to date after calculations. { - auto origin_data = isDateTime64(result_type) ? origin / scale_endtime : origin; - offset = static_cast(ToStartOfInterval::execute(delta, num_units, time_zone, scale_endtime, origin_data)); + offset /= SECONDS_PER_DAY; + origin /= SECONDS_PER_DAY; } - - if (isDate(result_type)) /// The result should be a date and the calculations were as datetime. - result_data[i] += (origin + offset) / SECONDS_PER_DAY; - else if (unit == IntervalKind::Kind::Week || unit == IntervalKind::Kind::Month || unit == IntervalKind::Kind::Quarter || unit == IntervalKind::Kind::Year) - { - if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. - offset *= scale_endtime; - - result_data[i] += origin + offset; - } - else - { - /// ToStartOfInterval::execute() returns seconds. - - if (scale_interval == 1) - { - if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the correct scale. - offset *= scale_endtime; - - /// Interval has default scale, i.e. Year - Second. - - if (scale_endtime % 1000 != 0 && scale_endtime >= 1000) - /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has correct precision and origin doesn't. - result_data[i] += (origin + offset / scale_endtime) * scale_endtime; - else - /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has correct precision in offset and origin. - result_data[i] += (origin + offset); - } - else - { - /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond. - - if (scale_interval < scale_endtime) - /// If we have a time argument that has bigger scale than the interval can contain, we need - /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += origin + offset * scale_diff; - else - /// The other case: interval has bigger scale than the interval or they have the same scale, so offset has the right precision and origin doesn't. - result_data[i] += (origin + offset / scale_diff) * scale_diff; - } - } + result_data[i] = 0; + result_data[i] += (result_scale < origin_scale) ? (origin + offset) / scale_diff : (origin + offset) * scale_diff; } } + else // Overload: Default + { + for (size_t i = 0; i != size; ++i) + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + } + return result_col; } }; diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 552323be1a5..5e72df17027 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -23,8 +23,8 @@ Time and origin as DateTime64(9) 2023-10-09 10:10:07.123456789 2023-10-09 10:11:11.123456789 2023-10-09 10:11:12.123456789 -2023-10-09 10:11:12.987456789 -2023-10-09 10:11:12.987654789 +2023-10-09 10:11:12.987 +2023-10-09 10:11:12.987654 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) 2023-02-01 09:08:07.123 From efdef8c186eb81ed90ef0efdaa70323c4f42e98f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:18:55 +0200 Subject: [PATCH 45/79] fix build --- src/Functions/DateTimeTransforms.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 043213dcb64..d6d533f16ed 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -786,7 +786,7 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); else - return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin);z + return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); } }; From 54bdd392ee5dc8ca1541a52f1d61b442f3c164aa Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 11:50:09 +0200 Subject: [PATCH 46/79] fix build --- src/Functions/toStartOfInterval.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 35e9a93acef..3a0df85dcf0 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -215,7 +215,7 @@ public: std::unreachable(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; @@ -252,6 +252,12 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } + else if (isDate32(time_column_type)) + { + const auto * time_column_vec = checkAndGetColumn(&time_column_col); + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + } else if (isDateTime(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); @@ -266,12 +272,6 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } - else if (isDate32(time_column_type)) - { - const auto * time_column_vec = checkAndGetColumn(&time_column_col); - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, input_rows_count); - } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64", getName()); } @@ -345,11 +345,12 @@ private: using ResultColumnType = typename ResultDataType::ColumnType; const auto & time_data = time_column_type.getData(); + size_t size = time_data.size(); auto result_col = result_type->createColumn(); auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); - result_data.resize(input_rows_count); + result_data.resize(size); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); From e16c85d94fd2f0143f6e31d456ccc07abfa8c816 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:32:57 +0200 Subject: [PATCH 47/79] fix suggestions --- src/Functions/toStartOfInterval.cpp | 12 ++++++------ ...02916_to_start_of_interval_with_origin.reference | 6 ++++++ .../02916_to_start_of_interval_with_origin.sql | 13 +++++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 3a0df85dcf0..3301974da7f 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -18,11 +18,11 @@ namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -275,7 +275,7 @@ private: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64", getName()); } - template + template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const @@ -339,8 +339,8 @@ private: std::unreachable(); } - template - ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const + template + ColumnPtr execute(const TimeDataType &, const TimeColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const { using ResultColumnType = typename ResultDataType::ColumnType; diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 5e72df17027..f0afdf03963 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -5,6 +5,12 @@ Time and origin as Date 2023-10-08 2023-10-08 2023-10-09 +Time and origin as Date32 +2023-02-01 +2023-08-01 +2023-10-08 +2023-10-08 +2023-10-09 Time and origin as DateTime 2023-02-01 09:08:07 2023-08-01 09:08:07 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 4f8a96b093d..b03ccae31d9 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -36,6 +36,19 @@ SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMillisecond(1), toDate( SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMicrosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalNanosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT 'Time and origin as Date32'; +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalYear(1), toDate32('2022-02-01')); +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalQuarter(1), toDate32('2022-02-01')); +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalMonth(1), toDate32('2023-09-08')); +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalWeek(1), toDate32('2023-10-01')); +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalDay(1), toDate32('2023-10-08')); +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalHour(1), toDate32('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalMinute(1), toDate32('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalSecond(1), toDate32('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalMillisecond(1), toDate32('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalMicrosecond(1), toDate32('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-10-09'), toIntervalNanosecond(1), toDate32('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + SELECT 'Time and origin as DateTime'; SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); From 945c93032207a7bf7e5b153fb759342c8e172308 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:53:19 +0200 Subject: [PATCH 48/79] fix build --- src/Functions/toStartOfInterval.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 3301974da7f..dc629160aff 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -313,27 +313,27 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Kind::Nanosecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Microsecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Millisecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Second: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Minute: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Hour: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Day: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Week: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Month: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Quarter: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Year: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); From f497fde2ca8a4fba7e9fb94c1ac11d5db5e11c87 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 15:08:12 +0200 Subject: [PATCH 49/79] fix templates --- src/Functions/toStartOfInterval.cpp | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index dc629160aff..b2b0c2dbfed 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -250,19 +250,19 @@ private: const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime64(time_column_type)) { @@ -270,7 +270,7 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, Date32, DateTime or DateTime64", getName()); } @@ -313,27 +313,27 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Kind::Nanosecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Microsecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Millisecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Second: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Minute: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Hour: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Day: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Week: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Month: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Quarter: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Kind::Year: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); From 6acb5ab5a18a453d45231d51f14cc2e51d11b207 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 15:52:23 +0200 Subject: [PATCH 50/79] add Date32 support --- src/Functions/toStartOfInterval.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b2b0c2dbfed..77c72e5b5f2 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include @@ -118,16 +117,18 @@ public: "A timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); } - else if (isDate(type_arg3) || isDateTime(type_arg3) || isDateTime64(type_arg3)) + else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) { overload = Overload::Origin; const DataTypePtr & type_arg1 = arguments[0].type; - if (isDateTime64(type_arg1) && isDateTime64(type_arg3)) - result_type = ResultType::DateTime64; + if (isDate(type_arg1) && isDate(type_arg3)) + result_type = ResultType::Date; + else if (isDate32(type_arg1) && isDate32(type_arg3)) + result_type = ResultType::Date32; else if (isDateTime(type_arg1) && isDateTime(type_arg3)) result_type = ResultType::DateTime; - else if (isDate(type_arg1) && isDate(type_arg3)) - result_type = ResultType::Date; + else if (isDateTime64(type_arg1) && isDateTime64(type_arg3)) + result_type = ResultType::DateTime64; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); } @@ -230,6 +231,8 @@ public: ColumnPtr result_column; if (isDate(result_type)) result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else if (isDate32(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); else if (isDateTime(result_type)) result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); else if (isDateTime64(result_type)) @@ -357,7 +360,7 @@ private: if (origin_column.column) // Overload: Origin { const bool is_small_interval = (unit == IntervalKind::Kind::Nanosecond || unit == IntervalKind::Kind::Microsecond || unit == IntervalKind::Kind::Millisecond); - const bool is_result_date = isDate(result_type); + const bool is_result_date = isDateOrDate32(result_type); Int64 result_scale = scale_multiplier; Int64 origin_scale = 1; From 018a62777be68cbabce86e3ee9ee74692f6caf3f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 4 Sep 2024 16:56:07 +0200 Subject: [PATCH 51/79] small fixes for Date32 --- src/Functions/toStartOfInterval.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 77c72e5b5f2..6573fef6634 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -292,13 +292,13 @@ private: case IntervalKind::Kind::Nanosecond: case IntervalKind::Kind::Microsecond: case IntervalKind::Kind::Millisecond: - if (isDate(time_data_type) || isDateTime(time_data_type)) + if (isDateOrDate32(time_data_type) || isDateTime(time_data_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); break; case IntervalKind::Kind::Second: case IntervalKind::Kind::Minute: case IntervalKind::Kind::Hour: - if (isDate(time_data_type)) + if (isDateOrDate32(time_data_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); break; default: From 06507190d414b2537d3759128fdf40332d0fe1b8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 6 Sep 2024 14:29:26 +0200 Subject: [PATCH 52/79] Speed up some Kafka tests with multiprocessing --- tests/integration/test_storage_kafka/test.py | 196 +++++++++++-------- 1 file changed, 116 insertions(+), 80 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index bef90e1b9d3..6802a0b7785 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -10,6 +10,8 @@ import string import ast import math +from multiprocessing.dummy import Pool + import avro.schema import avro.io import avro.datafile @@ -4831,27 +4833,13 @@ def test_max_rows_per_message(kafka_cluster, create_query_generator): def test_row_based_formats(kafka_cluster, create_query_generator): admin_client = get_admin_client(kafka_cluster) - for format_name in [ - "TSV", - "TSVWithNamesAndTypes", - "TSKV", - "CSV", - "CSVWithNamesAndTypes", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "Avro", - "RowBinary", - "RowBinaryWithNamesAndTypes", - "MsgPack", - ]: + p = Pool(10) + + def run_for_format_name(format_name): logging.debug("Checking {format_name}") topic_name = format_name + get_topic_postfix(create_query_generator) + view_name = f"kafka_view_{format_name}" table_name = f"kafka_{format_name}" with kafka_topic(admin_client, topic_name): @@ -4870,12 +4858,12 @@ def test_row_based_formats(kafka_cluster, create_query_generator): instance.query( f""" - DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{view_name}; DROP TABLE IF EXISTS test.{table_name}; {create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + CREATE MATERIALIZED VIEW test.{view_name} ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); @@ -4886,18 +4874,44 @@ def test_row_based_formats(kafka_cluster, create_query_generator): kafka_cluster, topic_name, message_count, need_decode=False ) - assert len(messages) == message_count + assert ( + len(messages) == message_count + ), f"Invalid message count for {format_name}" instance.query_with_retry( - "SELECT count() FROM test.view", + f"SELECT count() FROM test.{view_name}", check_callback=lambda res: int(res) == num_rows, ) - result = instance.query("SELECT * FROM test.view") + result = instance.query(f"SELECT * FROM test.{view_name}") expected = "" for i in range(num_rows): expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected + assert result == expected, f"Invalid result for {format_name}" + + results = [] + for format_name in [ + "TSV", + "TSVWithNamesAndTypes", + "TSKV", + "CSV", + "CSVWithNamesAndTypes", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "RowBinaryWithNamesAndTypes", + "MsgPack", + ]: + results.append(p.apply_async(run_for_format_name, args=(format_name,))) + + for result in results: + result.get() @pytest.mark.parametrize( @@ -4955,16 +4969,12 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): num_rows = 100 message_count = 9 - for format_name in [ - "JSONColumns", - "Native", - "Arrow", - "Parquet", - "ORC", - "JSONCompactColumns", - ]: + p = Pool(10) + + def run_for_format_name(format_name): topic_name = format_name + get_topic_postfix(create_query_generator) table_name = f"kafka_{format_name}" + view_name = f"kafka_view_{format_name}" logging.debug(f"Checking format {format_name}") with kafka_topic(admin_client, topic_name): create_query = create_query_generator( @@ -4977,12 +4987,12 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): instance.query( f""" - DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{view_name}; DROP TABLE IF EXISTS test.{table_name}; {create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + CREATE MATERIALIZED VIEW test.{view_name} ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; @@ -4991,22 +5001,38 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): messages = kafka_consume_with_retry( kafka_cluster, topic_name, message_count, need_decode=False ) - assert len(messages) == message_count + assert ( + len(messages) == message_count + ), f"Invalid message count for {format_name}" rows = int( instance.query_with_retry( - "SELECT count() FROM test.view", + f"SELECT count() FROM test.{view_name}", check_callback=lambda res: int(res) == num_rows, ) ) - assert rows == num_rows + assert rows == num_rows, f"Invalid row count for {format_name}" - result = instance.query("SELECT * FROM test.view ORDER by key") + result = instance.query(f"SELECT * FROM test.{view_name} ORDER by key") expected = "" for i in range(num_rows): expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected + assert result == expected, f"Invalid result for {format_name}" + + results = [] + for format_name in [ + "JSONColumns", + "Native", + "Arrow", + "Parquet", + "ORC", + "JSONCompactColumns", + ]: + results.append(p.apply_async(run_for_format_name, args=(format_name,))) + + for result in results: + result.get() def test_system_kafka_consumers(kafka_cluster): @@ -5300,6 +5326,54 @@ def test_formats_errors(kafka_cluster): bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) + p = Pool(10) + + def run_for_format_name(format_name): + with kafka_topic(admin_client, format_name): + table_name = f"kafka_{format_name}" + view_name = f"kafka_view_{format_name}" + + instance.query( + f""" + DROP TABLE IF EXISTS test.{view_name}; + DROP TABLE IF EXISTS test.{table_name}; + + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_max_rows_per_message = 5, + format_template_row='template_row.format', + format_regexp='id: (.+?)', + input_format_with_names_use_header=0, + format_schema='key_value_message:Message'; + + CREATE MATERIALIZED VIEW test.{view_name} ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + """ + ) + + kafka_produce( + kafka_cluster, + format_name, + ["Broken message\nBroken message\nBroken message\n"], + ) + + num_errors = int( + instance.query_with_retry( + f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", + check_callback=lambda res: int(res) > 0, + ) + ) + + assert num_errors > 0, f"No errors for {format_name}" + + instance.query(f"DROP TABLE test.{table_name}") + instance.query(f"DROP TABLE test.{view_name}") + + results = [] for format_name in [ "Template", "Regexp", @@ -5342,48 +5416,10 @@ def test_formats_errors(kafka_cluster): "HiveText", "MySQLDump", ]: - with kafka_topic(admin_client, format_name): - table_name = f"kafka_{format_name}" + results.append(p.apply_async(run_for_format_name, args=(format_name,))) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.{table_name}; - - CREATE TABLE test.{table_name} (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}', - kafka_max_rows_per_message = 5, - format_template_row='template_row.format', - format_regexp='id: (.+?)', - input_format_with_names_use_header=0, - format_schema='key_value_message:Message'; - - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; - """ - ) - - kafka_produce( - kafka_cluster, - format_name, - ["Broken message\nBroken message\nBroken message\n"], - ) - - num_errors = int( - instance.query_with_retry( - f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", - check_callback=lambda res: int(res) > 0, - ) - ) - - assert num_errors > 0 - - instance.query(f"DROP TABLE test.{table_name}") - instance.query("DROP TABLE test.view") + for result in results: + result.get() @pytest.mark.parametrize( From 84a5c88b50f197dc4a6f3bf9e06d245959011805 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 4 Sep 2024 12:45:13 +0000 Subject: [PATCH 53/79] Minor follow-up to #66933 --- .../functions/string-replace-functions.md | 41 ++++++++++++------- src/Functions/overlay.cpp | 16 ++++---- tests/queries/0_stateless/03205_overlay.sql | 4 +- 3 files changed, 38 insertions(+), 23 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 0cc6b0b27d5..283d41d8b73 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -20,10 +20,10 @@ overlay(s, replace, offset[, length]) **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. -- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of bytes removed from `input` equals the length of `replace`; otherwise `length` bytes are removed. +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the string `s`. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within the input string `s` to be replaced. If `length` is not specified, the number of bytes removed from `s` equals the length of `replace`; otherwise `length` bytes are removed. **Returned value** @@ -32,22 +32,35 @@ overlay(s, replace, offset[, length]) **Example** ```sql -SELECT overlay('ClickHouse SQL', 'CORE', 12) AS res; +SELECT overlay('My father is from Mexico.', 'mother', 4) AS res; ``` Result: ```text -┌─res─────────────┐ -│ ClickHouse CORE │ -└─────────────────┘ +┌─res──────────────────────┐ +│ My mother is from Mexico.│ +└──────────────────────────┘ +``` + +```sql +SELECT overlay('My father is from Mexico.', 'dad', 4, 6) AS res; +``` + +Result: + +```text +┌─res───────────────────┐ +│ My dad is from Mexico.│ +└───────────────────────┘ ``` ## overlayUTF8 Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. -Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Assumes that the string contains valid UTF-8 encoded text. +If this assumption is violated, no exception is thrown and the result is undefined. **Syntax** @@ -59,8 +72,8 @@ overlayUTF8(s, replace, offset[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. -- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of characters removed from `input` equals the length of `replace`; otherwise `length` characters are removed. +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the input string `s`. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within the input string `s` to be replaced. If `length` is not specified, the number of characters removed from `s` equals the length of `replace`; otherwise `length` characters are removed. **Returned value** @@ -69,15 +82,15 @@ overlayUTF8(s, replace, offset[, length]) **Example** ```sql -SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; +SELECT overlay('Mein Vater ist aus Österreich.', 'der Türkei', 20) AS res; ``` Result: ```text -┌─res────────────────────────┐ -│ ClickHouse是开源OLAP数据库 │ -└────────────────────────────┘ +┌─res───────────────────────────┐ +│ Mein Vater ist aus der Türkei.│ +└───────────────────────────────┘ ``` ## replaceOne diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index df8b825eabe..eddb7590cab 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -1,12 +1,12 @@ #include #include +#include +#include #include #include #include #include #include -#include -#include namespace DB { @@ -16,8 +16,8 @@ namespace /// If 'is_utf8' - measure offset and length in code points instead of bytes. /// Syntax: -/// - overlay(input, replace, offset[, length]) -/// - overlayUTF8(input, replace, offset[, length]) - measure offset and length in code points instead of bytes +/// - overlay(s, replace, offset[, length]) +/// - overlayUTF8(s, replace, offset[, length]) - measure offset and length in code points instead of bytes template class FunctionOverlay : public IFunction { @@ -34,7 +34,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args{ - {"input", static_cast(&isString), nullptr, "String"}, + {"s", static_cast(&isString), nullptr, "String"}, {"replace", static_cast(&isString), nullptr, "String"}, {"offset", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, }; @@ -100,7 +100,6 @@ public: res_data.reserve(col_input_string->getChars().size()); } - #define OVERLAY_EXECUTE_CASE(HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ if (input_is_const && replace_is_const) \ constantConstant( \ @@ -186,7 +185,6 @@ public: return res_col; } - private: /// input offset is 1-based, maybe negative /// output result is 0-based valid offset, within [0, input_size] @@ -229,6 +227,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { + /// Free us from handling negative length in the code below if (has_four_args && length_is_const && const_length < 0) { constantConstant( @@ -343,6 +342,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { + /// Free us from handling negative length in the code below if (has_four_args && length_is_const && const_length < 0) { vectorConstant( @@ -461,6 +461,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { + /// Free us from handling negative length in the code below if (has_four_args && length_is_const && const_length < 0) { constantVector( @@ -577,6 +578,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { + /// Free us from handling negative length in the code below if (has_four_args && length_is_const && const_length < 0) { vectorVector( diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql index 765b29f93ec..b692cc0c5ab 100644 --- a/tests/queries/0_stateless/03205_overlay.sql +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -35,10 +35,10 @@ SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0) SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); SELECT 'Test with special offset values'; -WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '之', offset) FROM numbers(26); +WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '之', offset) FROM numbers(26) ORDER BY number; SELECT 'Test with special length values'; -WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8); +WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8) ORDER BY number; SELECT 'Test with special input and replace values'; SELECT overlay('', '_', 6), overlayUTF8('', '_', 6); From 86f11e221e2b2880d56f330c9a5aa0c2ab129727 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 6 Sep 2024 17:07:21 +0200 Subject: [PATCH 54/79] Speed up more tests --- tests/integration/test_storage_kafka/test.py | 45 ++++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 6802a0b7785..440b7c526b9 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -1000,7 +1000,11 @@ def test_kafka_formats(kafka_cluster, create_query_generator): } topic_postfix = str(hash(create_query_generator)) - for format_name, format_opts in list(all_formats.items()): + + p = Pool(10) + results = [] + + def run_for_format(format_name, format_opts): logging.debug(f"Set up {format_name}") topic_name = f"format_tests_{format_name}-{topic_postfix}" data_sample = format_opts["data_sample"] @@ -1036,6 +1040,13 @@ def test_kafka_formats(kafka_cluster, create_query_generator): ), ) ) + + for format_name, format_opts in list(all_formats.items()): + results.append(p.apply_async(run_for_format, args=(format_name, format_opts))) + + for result in results: + result.get() + raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -1066,7 +1077,9 @@ def test_kafka_formats(kafka_cluster, create_query_generator): ) assert result_checker(res) - for format_name, format_opts in list(all_formats.items()): + results = [] + + def run_for_format2(format_name, format_opts): logging.debug(("Checking {}".format(format_name))) topic_name = f"format_tests_{format_name}-{topic_postfix}" # shift offsets by 1 if format supports empty value @@ -1090,6 +1103,12 @@ def test_kafka_formats(kafka_cluster, create_query_generator): ) kafka_delete_topic(get_admin_client(kafka_cluster), topic_name) + for format_name, format_opts in list(all_formats.items()): + results.append(p.apply_async(run_for_format2, args=(format_name, format_opts))) + + for result in results: + result.get() + # Since everything is async and shaky when receiving messages from Kafka, # we may want to try and check results multiple times in a loop. @@ -4239,7 +4258,11 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator topic_name_prefix = "format_tests_4_stream_" topic_name_postfix = get_topic_postfix(create_query_generator) - for format_name, format_opts in list(all_formats.items()): + + p = Pool(10) + results = [] + + def run_for_format(format_name, format_opts): logging.debug(f"Set up {format_name}") topic_name = f"{topic_name_prefix}{format_name}{topic_name_postfix}" data_sample = format_opts["data_sample"] @@ -4280,6 +4303,12 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator """ ) + for format_name, format_opts in list(all_formats.items()): + results.append(p.apply_async(run_for_format, args=(format_name, format_opts))) + + for result in results: + result.get() + raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -4310,7 +4339,9 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator ) assert result_checker(res) - for format_name, format_opts in list(all_formats.items()): + results = [] + + def run_for_format2(format_name, format_opts): logging.debug(f"Checking {format_name}") topic_name = f"{topic_name_prefix}{format_name}{topic_name_postfix}" # shift offsets by 1 if format supports empty value @@ -4350,6 +4381,12 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator ), "Proper error for format: {}".format(format_name) kafka_delete_topic(admin_client, topic_name) + for format_name, format_opts in list(all_formats.items()): + results.append(p.apply_async(run_for_format2, args=(format_name, format_opts))) + + for result in results: + result.get() + @pytest.mark.parametrize( "create_query_generator", From b430e27a5090e05e261d6a7961353e44972afa91 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 15:33:53 +0000 Subject: [PATCH 55/79] Add CITATION.cff --- CITATION.cff | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000000..e4008c6940b --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,31 @@ +# This CITATION.cff file was generated with cffinit. + +cff-version: 1.2.0 +title: "ClickHouse" +message: "If you use this software, please cite it as below." +type: software +authors: + - family-names: "Milovidov" + given-names: "Alexey" +repository-code: 'https://github.com/ClickHouse/ClickHouse' +url: 'https://clickhouse.com' +license: Apache-2.0 +preferred-citation: + type: article + authors: + - family-names: "Schulze" + given-names: "Robert" + - family-names: "Schreiber" + given-names: "Tom" + - family-names: "Yatsishin" + given-names: "Ilya" + - family-names: "Dahimene" + given-names: "Ryadh" + - family-names: "Milovidov" + given-names: "Alexey" + journal: "Proceedings of the VLDB Endowment" + title: "ClickHouse - Lightning Fast Analytics for Everyone" + year: 2024 + volume: 17 + issue: 12 + doi: 10.14778/3685800.3685802 From 11fae9f321104e70746b21d1c12d35fff719db5f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 16:37:42 +0000 Subject: [PATCH 56/79] Bump libarchive to v3.7.1 --- contrib/libarchive | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libarchive b/contrib/libarchive index ee457961713..0c21691b177 160000 --- a/contrib/libarchive +++ b/contrib/libarchive @@ -1 +1 @@ -Subproject commit ee45796171324519f0c0bfd012018dd099296336 +Subproject commit 0c21691b177fac5f4cceca2a1ff2ddfa5d60f51c From 86842766e430ad080c58ca1eb4265dc2e6816f0e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 16:59:14 +0000 Subject: [PATCH 57/79] Bump OpenSSL to 3.2.2 --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index 66deddc1e53..b7e86862e3a 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 66deddc1e53cda8706604a019777259372d1bd62 +Subproject commit b7e86862e3af97c33bd99442519cd8553d5a1c83 From fdf8ff0e66cee78694606696d03774533b5c29fc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 17:25:14 +0000 Subject: [PATCH 58/79] Don't forget to call ucasemap_close for every ucasemap_open --- src/Functions/LowerUpperUTF8Impl.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 7d60bd54d2f..f174bcd5f3e 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -6,6 +6,7 @@ # include # include +# include # include # include # include @@ -49,6 +50,11 @@ struct LowerUpperUTF8Impl if (U_FAILURE(error_code)) throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Error calling ucasemap_open: {}", u_errorName(error_code)); + SCOPE_EXIT( + { + ucasemap_close(case_map); + }); + size_t curr_offset = 0; for (size_t row_i = 0; row_i < input_rows_count; ++row_i) { From 4bd66f41879b8bc4a3f57bad790cc87e0cf2b5fb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 17:31:20 +0000 Subject: [PATCH 59/79] Add test --- .../00170_lower_upper_utf8_memleak.reference | 1 + .../0_stateless/00170_lower_upper_utf8_memleak.sh | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/00170_lower_upper_utf8_memleak.reference create mode 100755 tests/queries/0_stateless/00170_lower_upper_utf8_memleak.sh diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8_memleak.reference b/tests/queries/0_stateless/00170_lower_upper_utf8_memleak.reference new file mode 100644 index 00000000000..7f6160bace3 --- /dev/null +++ b/tests/queries/0_stateless/00170_lower_upper_utf8_memleak.reference @@ -0,0 +1 @@ +españa diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8_memleak.sh b/tests/queries/0_stateless/00170_lower_upper_utf8_memleak.sh new file mode 100755 index 00000000000..8436659c6a8 --- /dev/null +++ b/tests/queries/0_stateless/00170_lower_upper_utf8_memleak.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# no-fasttest: upper/lowerUTF8 use ICU + +# Test for issue #69336 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query "SELECT lowerUTF8('ESPAÑA')" From 52024b7af3958ad25ffe9212e517854f18b4d582 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 17:51:03 +0000 Subject: [PATCH 60/79] Bump openssl to 3.2.3 --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index b7e86862e3a..b3e62c440f3 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit b7e86862e3af97c33bd99442519cd8553d5a1c83 +Subproject commit b3e62c440f390e12e77c80675f883af82ad3d5ed From 2bd36997f734c4e743d2ad814365d2d775ea27b8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 12:11:10 +0000 Subject: [PATCH 61/79] Bump grpc to v1.59.5 CVE-2024-7246 --- contrib/grpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/grpc b/contrib/grpc index 1716359d2e2..7bc3abe952a 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit 1716359d2e28d304a250f9df0e6c0ccad03de8db +Subproject commit 7bc3abe952aba1dc7bce7f2f790dc781cb51a41e From fb3de3451cbf61ac69e8c5cdd30b47d029911158 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 18:04:01 +0000 Subject: [PATCH 62/79] Make a clean start with v1.44.2 --- contrib/libuv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libuv b/contrib/libuv index 4482964660c..04cc9419e49 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 4482964660c77eec1166cd7d14fb915e3dbd774a +Subproject commit 04cc9419e4976c2ea33108f206b50f5922c2bbfe From cc2467542b1c8d4fce916b84008b6f52b82701cc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 18:26:50 +0000 Subject: [PATCH 63/79] Bump libuv to 1.45.0 --- contrib/libuv | 2 +- contrib/libuv-cmake/CMakeLists.txt | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/contrib/libuv b/contrib/libuv index 04cc9419e49..3dd84dd6aed 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 04cc9419e4976c2ea33108f206b50f5922c2bbfe +Subproject commit 3dd84dd6aed6d2f726d2720d12ec240ff240bc00 diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index 928fdcdd7e6..22df1e9a102 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -10,6 +10,7 @@ set(uv_sources src/random.c src/strscpy.c src/strtok.c + src/thread-common.c src/threadpool.c src/timer.c src/uv-common.c @@ -70,10 +71,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112) list(APPEND uv_libraries rt) list(APPEND uv_sources - src/unix/epoll.c - src/unix/linux-core.c - src/unix/linux-inotify.c - src/unix/linux-syscalls.c + src/unix/linux.c src/unix/procfs-exepath.c src/unix/random-getrandom.c src/unix/random-sysctl-linux.c) From 22f1d9752e896a9583f8ffd0a0051c3af1f87981 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 18:48:40 +0000 Subject: [PATCH 64/79] Bump libuv to 1.46.0 --- contrib/libuv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libuv b/contrib/libuv index 3dd84dd6aed..0e4fcb11681 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 3dd84dd6aed6d2f726d2720d12ec240ff240bc00 +Subproject commit 0e4fcb11681ce480a5db15868021bb4424717d82 From ac6eb1c52092c0dcb0bda66dfda6d7a6b7337be8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 18:50:59 +0000 Subject: [PATCH 65/79] Bump libuv to 1.47.0 --- contrib/libuv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libuv b/contrib/libuv index 0e4fcb11681..7a297688d08 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 0e4fcb11681ce480a5db15868021bb4424717d82 +Subproject commit 7a297688d08c487e1c5e7f63f6f5950396ed9b81 From f65c869d4c5c661decc70b027f2f4e864dc4b261 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 6 Sep 2024 18:52:43 +0000 Subject: [PATCH 66/79] Bump libuv to v1.48.0 --- contrib/libuv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libuv b/contrib/libuv index 7a297688d08..714b58b9849 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 7a297688d08c487e1c5e7f63f6f5950396ed9b81 +Subproject commit 714b58b9849568211ade86b44dd91d37f8a2175e From ba038cba4732197c31d791d20c7e973ee4e11750 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Tue, 27 Aug 2024 00:31:53 +0000 Subject: [PATCH 67/79] Fix 02378_part_log_profile_events flakiness --- .../02378_part_log_profile_events.sh | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100755 tests/queries/0_stateless/02378_part_log_profile_events.sh diff --git a/tests/queries/0_stateless/02378_part_log_profile_events.sh b/tests/queries/0_stateless/02378_part_log_profile_events.sh new file mode 100755 index 00000000000..8dd8b1eca91 --- /dev/null +++ b/tests/queries/0_stateless/02378_part_log_profile_events.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# Tags: no-shared-merge-tree +# Tag no-shared-merge-tree: depend on events with local disk + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query " + DROP TABLE IF EXISTS test; + + CREATE TABLE test (key UInt64, val UInt64) engine = MergeTree Order by key PARTITION BY key >= 128; + SET max_block_size = 64, max_insert_block_size = 64, min_insert_block_size_rows = 64; + INSERT INTO test SELECT number AS key, sipHash64(number) AS val FROM numbers(512); +" + +${CLICKHOUSE_CLIENT} --query " + SYSTEM FLUSH LOGS; + SELECT + if(count(DISTINCT query_id) == 1, 'Ok', 'Error: ' || toString(count(DISTINCT query_id))), + if(count() == 512 / 64, 'Ok', 'Error: ' || toString(count())), -- 512 rows inserted, 64 rows per block + if(SUM(ProfileEvents['MergeTreeDataWriterRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterRows']))), + if(SUM(ProfileEvents['MergeTreeDataWriterUncompressedBytes']) >= 1024, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterUncompressedBytes']))), + if(SUM(ProfileEvents['MergeTreeDataWriterCompressedBytes']) >= 1024, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterCompressedBytes']))), + if(SUM(ProfileEvents['MergeTreeDataWriterBlocks']) >= 8, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterBlocks']))) + FROM system.part_log + WHERE event_time > now() - INTERVAL 10 MINUTE + AND database == currentDatabase() AND table == 'test' + AND event_type == 'NewPart'; +" + +${CLICKHOUSE_CLIENT} --query "OPTIMIZE TABLE test FINAL;" + +${CLICKHOUSE_CLIENT} --query " + SYSTEM FLUSH LOGS; + SELECT + if(count() > 2, 'Ok', 'Error: ' || toString(count())), + if(SUM(ProfileEvents['MergedRows']) >= 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergedRows']))) + FROM system.part_log + WHERE event_time > now() - INTERVAL 10 MINUTE + AND database == currentDatabase() AND table == 'test' + AND event_type == 'MergeParts'; +" + +${CLICKHOUSE_CLIENT} --query " + ALTER TABLE test UPDATE val = 0 WHERE key % 2 == 0 SETTINGS mutations_sync = 2 +" + +# The mutation query may return before the entry is added to the system.part_log table. +# Retry SYSTEM FLUSH LOGS until all entries are fully flushed. +for _ in {1..10}; do + ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + res=$(${CLICKHOUSE_CLIENT} --query " + SELECT count() FROM system.part_log + WHERE event_time > now() - INTERVAL 10 MINUTE + AND database == currentDatabase() AND table == 'test' + AND event_type == 'MutatePart';" + ) + if [[ $res -eq 2 ]]; then + break + fi + + sleep 2.0 +done + +${CLICKHOUSE_CLIENT} --query " + SELECT + if(count() == 2, 'Ok', 'Error: ' || toString(count())), + if(SUM(ProfileEvents['MutatedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MutatedRows']))), + if(SUM(ProfileEvents['FileOpen']) > 1, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['FileOpen']))) + FROM system.part_log + WHERE event_time > now() - INTERVAL 10 MINUTE + AND database == currentDatabase() AND table == 'test' + AND event_type == 'MutatePart'; +" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test" From 99667c42f3af47c2d2ee8bb9652d2f5ab6fa73aa Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 6 Sep 2024 20:59:48 +0000 Subject: [PATCH 68/79] sync with the private --- tests/queries/0_stateless/02378_part_log_profile_events.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02378_part_log_profile_events.sql b/tests/queries/0_stateless/02378_part_log_profile_events.sql index eec76d6f50e..706731fb644 100644 --- a/tests/queries/0_stateless/02378_part_log_profile_events.sql +++ b/tests/queries/0_stateless/02378_part_log_profile_events.sql @@ -1,3 +1,5 @@ +-- Tags: no-shared-merge-tree +-- no-shared-merge-tree: depend on events with local disk DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt64, val UInt64) engine = MergeTree Order by key PARTITION BY key >= 128; From bf74b600e506d59df3c12c3f830fc2a04763b97c Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 6 Sep 2024 21:00:16 +0000 Subject: [PATCH 69/79] rm tests/queries/0_stateless/02378_part_log_profile_events.sql --- .../02378_part_log_profile_events.sql | 52 ------------------- 1 file changed, 52 deletions(-) delete mode 100644 tests/queries/0_stateless/02378_part_log_profile_events.sql diff --git a/tests/queries/0_stateless/02378_part_log_profile_events.sql b/tests/queries/0_stateless/02378_part_log_profile_events.sql deleted file mode 100644 index 706731fb644..00000000000 --- a/tests/queries/0_stateless/02378_part_log_profile_events.sql +++ /dev/null @@ -1,52 +0,0 @@ --- Tags: no-shared-merge-tree --- no-shared-merge-tree: depend on events with local disk -DROP TABLE IF EXISTS test; - -CREATE TABLE test (key UInt64, val UInt64) engine = MergeTree Order by key PARTITION BY key >= 128; - -SET max_block_size = 64, max_insert_block_size = 64, min_insert_block_size_rows = 64; - -INSERT INTO test SELECT number AS key, sipHash64(number) AS val FROM numbers(512); - -SYSTEM FLUSH LOGS; - -SELECT - if(count(DISTINCT query_id) == 1, 'Ok', 'Error: ' || toString(count(DISTINCT query_id))), - if(count() == 512 / 64, 'Ok', 'Error: ' || toString(count())), -- 512 rows inserted, 64 rows per block - if(SUM(ProfileEvents['MergeTreeDataWriterRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterRows']))), - if(SUM(ProfileEvents['MergeTreeDataWriterUncompressedBytes']) >= 1024, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterUncompressedBytes']))), - if(SUM(ProfileEvents['MergeTreeDataWriterCompressedBytes']) >= 1024, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterCompressedBytes']))), - if(SUM(ProfileEvents['MergeTreeDataWriterBlocks']) >= 8, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergeTreeDataWriterBlocks']))) -FROM system.part_log -WHERE event_time > now() - INTERVAL 10 MINUTE - AND database == currentDatabase() AND table == 'test' - AND event_type == 'NewPart' -; - -OPTIMIZE TABLE test FINAL; - -SYSTEM FLUSH LOGS; -SELECT - if(count() > 2, 'Ok', 'Error: ' || toString(count())), - if(SUM(ProfileEvents['MergedRows']) >= 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergedRows']))) -FROM system.part_log -WHERE event_time > now() - INTERVAL 10 MINUTE - AND database == currentDatabase() AND table == 'test' - AND event_type == 'MergeParts' -; - -ALTER TABLE test UPDATE val = 0 WHERE key % 2 == 0 SETTINGS mutations_sync = 2; - -SYSTEM FLUSH LOGS; - -SELECT - if(count() == 2, 'Ok', 'Error: ' || toString(count())), - if(SUM(ProfileEvents['MutatedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MutatedRows']))), - if(SUM(ProfileEvents['FileOpen']) > 1, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['FileOpen']))) -FROM system.part_log -WHERE event_time > now() - INTERVAL 10 MINUTE - AND database == currentDatabase() AND table == 'test' - AND event_type == 'MutatePart' -; - -DROP TABLE test; From 7f637f7acfb83888e4c3de73c04de018da340c30 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 30 Aug 2024 21:46:56 +0000 Subject: [PATCH 70/79] kdc_network_reject: test passed --- tests/integration/helpers/network.py | 3 +- .../test_storage_kerberized_kafka/test.py | 48 ++++++++++++------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 065836396f3..f24b5924e73 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -303,6 +303,7 @@ class _NetworkManager: destination_port=None, action=None, probability=None, + protocol=None, custom_args=None, ): ret = [] @@ -317,7 +318,7 @@ class _NetworkManager: str(probability), ] ) - ret.extend(["-p", "tcp"]) + ret.extend(["-p", "tcp" if protocol is None else protocol]) if source is not None: ret.extend(["-s", source]) if destination is not None: diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 24d10d7ff83..c5e91831fc2 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -8,6 +8,7 @@ import logging from helpers.cluster import ClickHouseCluster, is_arm from helpers.test_tools import TSV from helpers.client import QueryRuntimeException +from helpers.network import PartitionManager import json import subprocess @@ -204,27 +205,40 @@ def test_kafka_json_as_string_no_kdc(kafka_cluster): ], ) - kafka_cluster.pause_container("kafka_kerberos") - time.sleep(45) # wait for ticket expiration + # temporary prevent CH - KDC communications + with PartitionManager() as pm: + for other_node in ["kafka_kerberos"]: + for node in kafka_cluster.instances.values(): + source = node.ip_address + destination = kafka_cluster.get_instance_ip(other_node) + logging.debug(f"partitioning source {source}, destination {destination}") + pm._add_rule( + { + "source": source, + "destination": destination, + "action": "REJECT", + "protocol": "all" + } + ) - instance.query( - """ - CREATE TABLE test.kafka_no_kdc (field String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kerberized_kafka1:19092', - kafka_topic_list = 'kafka_json_as_string_no_kdc', - kafka_group_name = 'kafka_json_as_string_no_kdc', - kafka_commit_on_select = 1, - kafka_format = 'JSONAsString', - kafka_flush_interval_ms=1000; - """ - ) + time.sleep(45) # wait for ticket expiration - result = instance.query("SELECT * FROM test.kafka_no_kdc;") + instance.query( + """ + CREATE TABLE test.kafka_no_kdc (field String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kerberized_kafka1:19092', + kafka_topic_list = 'kafka_json_as_string_no_kdc', + kafka_group_name = 'kafka_json_as_string_no_kdc', + kafka_commit_on_select = 1, + kafka_format = 'JSONAsString', + kafka_flush_interval_ms=1000; + """ + ) + + result = instance.query("SELECT * FROM test.kafka_no_kdc;") expected = "" - kafka_cluster.unpause_container("kafka_kerberos") - assert TSV(result) == TSV(expected) assert instance.contains_in_log("StorageKafka (kafka_no_kdc): Nothing to commit") assert instance.contains_in_log("Ticket expired") From c264f1c4681b3a84d1578ce6ad02213a868c01de Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Mon, 2 Sep 2024 07:45:27 +0000 Subject: [PATCH 71/79] kdc_network_reject: no redundant loop --- .../test_storage_kerberized_kafka/test.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index c5e91831fc2..dd5ee673b9e 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -207,19 +207,19 @@ def test_kafka_json_as_string_no_kdc(kafka_cluster): # temporary prevent CH - KDC communications with PartitionManager() as pm: - for other_node in ["kafka_kerberos"]: - for node in kafka_cluster.instances.values(): - source = node.ip_address - destination = kafka_cluster.get_instance_ip(other_node) - logging.debug(f"partitioning source {source}, destination {destination}") - pm._add_rule( - { - "source": source, - "destination": destination, - "action": "REJECT", - "protocol": "all" - } - ) + other_node = "kafka_kerberos" + for node in kafka_cluster.instances.values(): + source = node.ip_address + destination = kafka_cluster.get_instance_ip(other_node) + logging.debug(f"partitioning source {source}, destination {destination}") + pm._add_rule( + { + "source": source, + "destination": destination, + "action": "REJECT", + "protocol": "all" + } + ) time.sleep(45) # wait for ticket expiration From 4212ad02e3f1798d27588aa7648fb482e332f9ff Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Tue, 3 Sep 2024 13:32:02 +0000 Subject: [PATCH 72/79] kdc_network_reject: make black formatter happy --- tests/integration/test_storage_kerberized_kafka/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index dd5ee673b9e..3ac3415b6f3 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -217,7 +217,7 @@ def test_kafka_json_as_string_no_kdc(kafka_cluster): "source": source, "destination": destination, "action": "REJECT", - "protocol": "all" + "protocol": "all", } ) From a9fcd61b92a0a0e126036d4d31a88d9ae776662c Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Wed, 4 Sep 2024 15:48:59 +0000 Subject: [PATCH 73/79] kdc_network_reject: fix test_kafka_config_from_sql_named_collection --- tests/integration/test_storage_kerberized_kafka/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 3ac3415b6f3..cb8693c95ad 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -259,6 +259,7 @@ def test_kafka_config_from_sql_named_collection(kafka_cluster): instance.query( """ + DROP NAMED COLLECTION IF EXISTS kafka_config CREATE NAMED COLLECTION kafka_config AS kafka.security_protocol = 'SASL_PLAINTEXT', kafka.sasl_mechanism = 'GSSAPI', From f5c6612a0bf4276a8a664634eacb6449f4985401 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Wed, 4 Sep 2024 21:15:38 +0000 Subject: [PATCH 74/79] kdc_network_reject: bugfix --- .../test_storage_kerberized_kafka/test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index cb8693c95ad..4ffb5dab906 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -139,7 +139,7 @@ def test_kafka_json_as_string_request_new_ticket_after_expiration(kafka_cluster) kafka_produce( kafka_cluster, - "kafka_json_as_string", + "kafka_json_as_string_after_expiration", [ '{"t": 123, "e": {"x": "woof"} }', "", @@ -153,9 +153,9 @@ def test_kafka_json_as_string_request_new_ticket_after_expiration(kafka_cluster) CREATE TABLE test.kafka (field String) ENGINE = Kafka SETTINGS kafka_broker_list = 'kerberized_kafka1:19092', - kafka_topic_list = 'kafka_json_as_string', + kafka_topic_list = 'kafka_json_as_string_after_expiration', kafka_commit_on_select = 1, - kafka_group_name = 'kafka_json_as_string', + kafka_group_name = 'kafka_json_as_string_after_expiration', kafka_format = 'JSONAsString', kafka_flush_interval_ms=1000; """ @@ -248,7 +248,7 @@ def test_kafka_json_as_string_no_kdc(kafka_cluster): def test_kafka_config_from_sql_named_collection(kafka_cluster): kafka_produce( kafka_cluster, - "kafka_json_as_string", + "kafka_json_as_string_named_collection", [ '{"t": 123, "e": {"x": "woof"} }', "", @@ -259,7 +259,7 @@ def test_kafka_config_from_sql_named_collection(kafka_cluster): instance.query( """ - DROP NAMED COLLECTION IF EXISTS kafka_config + DROP NAMED COLLECTION IF EXISTS kafka_config; CREATE NAMED COLLECTION kafka_config AS kafka.security_protocol = 'SASL_PLAINTEXT', kafka.sasl_mechanism = 'GSSAPI', @@ -270,9 +270,9 @@ def test_kafka_config_from_sql_named_collection(kafka_cluster): kafka.api_version_request = 'false', kafka_broker_list = 'kerberized_kafka1:19092', - kafka_topic_list = 'kafka_json_as_string', + kafka_topic_list = 'kafka_json_as_string_named_collection', kafka_commit_on_select = 1, - kafka_group_name = 'kafka_json_as_string', + kafka_group_name = 'kafka_json_as_string_named_collection', kafka_format = 'JSONAsString', kafka_flush_interval_ms=1000; """ From f16d4c9567ccf6fa5f206614925e860da4587071 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Thu, 5 Sep 2024 07:31:17 +0000 Subject: [PATCH 75/79] kdc_network_reject: bugfix (again) --- tests/integration/test_storage_kerberized_kafka/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 4ffb5dab906..a00914543c6 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -171,7 +171,7 @@ def test_kafka_json_as_string_request_new_ticket_after_expiration(kafka_cluster) """ assert TSV(result) == TSV(expected) assert instance.contains_in_log( - "Parsing of message (topic: kafka_json_as_string, partition: 0, offset: 1) return no rows" + "Parsing of message (topic: kafka_json_as_string_after_expiration, partition: 0, offset: 1) return no rows" ) @@ -294,7 +294,7 @@ def test_kafka_config_from_sql_named_collection(kafka_cluster): """ assert TSV(result) == TSV(expected) assert instance.contains_in_log( - "Parsing of message (topic: kafka_json_as_string, partition: 0, offset: 1) return no rows" + "Parsing of message (topic: kafka_json_as_string_named_collection, partition: 0, offset: 1) return no rows" ) From 9667c19b6ea81bf5227202824879c3d9b34a33c9 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Sat, 7 Sep 2024 09:16:38 +0200 Subject: [PATCH 76/79] Revert "Speed up some Kafka tests with multiprocessing" --- tests/integration/test_storage_kafka/test.py | 289 +++++++------------ 1 file changed, 108 insertions(+), 181 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 440b7c526b9..bef90e1b9d3 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -10,8 +10,6 @@ import string import ast import math -from multiprocessing.dummy import Pool - import avro.schema import avro.io import avro.datafile @@ -1000,11 +998,7 @@ def test_kafka_formats(kafka_cluster, create_query_generator): } topic_postfix = str(hash(create_query_generator)) - - p = Pool(10) - results = [] - - def run_for_format(format_name, format_opts): + for format_name, format_opts in list(all_formats.items()): logging.debug(f"Set up {format_name}") topic_name = f"format_tests_{format_name}-{topic_postfix}" data_sample = format_opts["data_sample"] @@ -1040,13 +1034,6 @@ def test_kafka_formats(kafka_cluster, create_query_generator): ), ) ) - - for format_name, format_opts in list(all_formats.items()): - results.append(p.apply_async(run_for_format, args=(format_name, format_opts))) - - for result in results: - result.get() - raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -1077,9 +1064,7 @@ def test_kafka_formats(kafka_cluster, create_query_generator): ) assert result_checker(res) - results = [] - - def run_for_format2(format_name, format_opts): + for format_name, format_opts in list(all_formats.items()): logging.debug(("Checking {}".format(format_name))) topic_name = f"format_tests_{format_name}-{topic_postfix}" # shift offsets by 1 if format supports empty value @@ -1103,12 +1088,6 @@ def test_kafka_formats(kafka_cluster, create_query_generator): ) kafka_delete_topic(get_admin_client(kafka_cluster), topic_name) - for format_name, format_opts in list(all_formats.items()): - results.append(p.apply_async(run_for_format2, args=(format_name, format_opts))) - - for result in results: - result.get() - # Since everything is async and shaky when receiving messages from Kafka, # we may want to try and check results multiple times in a loop. @@ -4258,11 +4237,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator topic_name_prefix = "format_tests_4_stream_" topic_name_postfix = get_topic_postfix(create_query_generator) - - p = Pool(10) - results = [] - - def run_for_format(format_name, format_opts): + for format_name, format_opts in list(all_formats.items()): logging.debug(f"Set up {format_name}") topic_name = f"{topic_name_prefix}{format_name}{topic_name_postfix}" data_sample = format_opts["data_sample"] @@ -4303,12 +4278,6 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator """ ) - for format_name, format_opts in list(all_formats.items()): - results.append(p.apply_async(run_for_format, args=(format_name, format_opts))) - - for result in results: - result.get() - raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -4339,9 +4308,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator ) assert result_checker(res) - results = [] - - def run_for_format2(format_name, format_opts): + for format_name, format_opts in list(all_formats.items()): logging.debug(f"Checking {format_name}") topic_name = f"{topic_name_prefix}{format_name}{topic_name_postfix}" # shift offsets by 1 if format supports empty value @@ -4381,12 +4348,6 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator ), "Proper error for format: {}".format(format_name) kafka_delete_topic(admin_client, topic_name) - for format_name, format_opts in list(all_formats.items()): - results.append(p.apply_async(run_for_format2, args=(format_name, format_opts))) - - for result in results: - result.get() - @pytest.mark.parametrize( "create_query_generator", @@ -4870,63 +4831,6 @@ def test_max_rows_per_message(kafka_cluster, create_query_generator): def test_row_based_formats(kafka_cluster, create_query_generator): admin_client = get_admin_client(kafka_cluster) - p = Pool(10) - - def run_for_format_name(format_name): - logging.debug("Checking {format_name}") - - topic_name = format_name + get_topic_postfix(create_query_generator) - view_name = f"kafka_view_{format_name}" - table_name = f"kafka_{format_name}" - - with kafka_topic(admin_client, topic_name): - num_rows = 10 - max_rows_per_message = 5 - message_count = num_rows / max_rows_per_message - - create_query = create_query_generator( - table_name, - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format=format_name, - settings={"kafka_max_rows_per_message": max_rows_per_message}, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.{view_name}; - DROP TABLE IF EXISTS test.{table_name}; - - {create_query}; - - CREATE MATERIALIZED VIEW test.{view_name} ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; - - INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); - """ - ) - - messages = kafka_consume_with_retry( - kafka_cluster, topic_name, message_count, need_decode=False - ) - - assert ( - len(messages) == message_count - ), f"Invalid message count for {format_name}" - - instance.query_with_retry( - f"SELECT count() FROM test.{view_name}", - check_callback=lambda res: int(res) == num_rows, - ) - - result = instance.query(f"SELECT * FROM test.{view_name}") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected, f"Invalid result for {format_name}" - - results = [] for format_name in [ "TSV", "TSVWithNamesAndTypes", @@ -4945,10 +4849,55 @@ def test_row_based_formats(kafka_cluster, create_query_generator): "RowBinaryWithNamesAndTypes", "MsgPack", ]: - results.append(p.apply_async(run_for_format_name, args=(format_name,))) + logging.debug("Checking {format_name}") - for result in results: - result.get() + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + + with kafka_topic(admin_client, topic_name): + num_rows = 10 + max_rows_per_message = 5 + message_count = num_rows / max_rows_per_message + + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + settings={"kafka_max_rows_per_message": max_rows_per_message}, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; + + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + + INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); + """ + ) + + messages = kafka_consume_with_retry( + kafka_cluster, topic_name, message_count, need_decode=False + ) + + assert len(messages) == message_count + + instance.query_with_retry( + "SELECT count() FROM test.view", + check_callback=lambda res: int(res) == num_rows, + ) + + result = instance.query("SELECT * FROM test.view") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected @pytest.mark.parametrize( @@ -5006,12 +4955,16 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): num_rows = 100 message_count = 9 - p = Pool(10) - - def run_for_format_name(format_name): + for format_name in [ + "JSONColumns", + "Native", + "Arrow", + "Parquet", + "ORC", + "JSONCompactColumns", + ]: topic_name = format_name + get_topic_postfix(create_query_generator) table_name = f"kafka_{format_name}" - view_name = f"kafka_view_{format_name}" logging.debug(f"Checking format {format_name}") with kafka_topic(admin_client, topic_name): create_query = create_query_generator( @@ -5024,12 +4977,12 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): instance.query( f""" - DROP TABLE IF EXISTS test.{view_name}; + DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.{table_name}; {create_query}; - CREATE MATERIALIZED VIEW test.{view_name} ENGINE=MergeTree ORDER BY (key, value) AS + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; @@ -5038,38 +4991,22 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): messages = kafka_consume_with_retry( kafka_cluster, topic_name, message_count, need_decode=False ) - assert ( - len(messages) == message_count - ), f"Invalid message count for {format_name}" + assert len(messages) == message_count rows = int( instance.query_with_retry( - f"SELECT count() FROM test.{view_name}", + "SELECT count() FROM test.view", check_callback=lambda res: int(res) == num_rows, ) ) - assert rows == num_rows, f"Invalid row count for {format_name}" + assert rows == num_rows - result = instance.query(f"SELECT * FROM test.{view_name} ORDER by key") + result = instance.query("SELECT * FROM test.view ORDER by key") expected = "" for i in range(num_rows): expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected, f"Invalid result for {format_name}" - - results = [] - for format_name in [ - "JSONColumns", - "Native", - "Arrow", - "Parquet", - "ORC", - "JSONCompactColumns", - ]: - results.append(p.apply_async(run_for_format_name, args=(format_name,))) - - for result in results: - result.get() + assert result == expected def test_system_kafka_consumers(kafka_cluster): @@ -5363,54 +5300,6 @@ def test_formats_errors(kafka_cluster): bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) - p = Pool(10) - - def run_for_format_name(format_name): - with kafka_topic(admin_client, format_name): - table_name = f"kafka_{format_name}" - view_name = f"kafka_view_{format_name}" - - instance.query( - f""" - DROP TABLE IF EXISTS test.{view_name}; - DROP TABLE IF EXISTS test.{table_name}; - - CREATE TABLE test.{table_name} (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}', - kafka_max_rows_per_message = 5, - format_template_row='template_row.format', - format_regexp='id: (.+?)', - input_format_with_names_use_header=0, - format_schema='key_value_message:Message'; - - CREATE MATERIALIZED VIEW test.{view_name} ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; - """ - ) - - kafka_produce( - kafka_cluster, - format_name, - ["Broken message\nBroken message\nBroken message\n"], - ) - - num_errors = int( - instance.query_with_retry( - f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", - check_callback=lambda res: int(res) > 0, - ) - ) - - assert num_errors > 0, f"No errors for {format_name}" - - instance.query(f"DROP TABLE test.{table_name}") - instance.query(f"DROP TABLE test.{view_name}") - - results = [] for format_name in [ "Template", "Regexp", @@ -5453,10 +5342,48 @@ def test_formats_errors(kafka_cluster): "HiveText", "MySQLDump", ]: - results.append(p.apply_async(run_for_format_name, args=(format_name,))) + with kafka_topic(admin_client, format_name): + table_name = f"kafka_{format_name}" - for result in results: - result.get() + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; + + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_max_rows_per_message = 5, + format_template_row='template_row.format', + format_regexp='id: (.+?)', + input_format_with_names_use_header=0, + format_schema='key_value_message:Message'; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + """ + ) + + kafka_produce( + kafka_cluster, + format_name, + ["Broken message\nBroken message\nBroken message\n"], + ) + + num_errors = int( + instance.query_with_retry( + f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", + check_callback=lambda res: int(res) > 0, + ) + ) + + assert num_errors > 0 + + instance.query(f"DROP TABLE test.{table_name}") + instance.query("DROP TABLE test.view") @pytest.mark.parametrize( From c335f4b1e409390b90579c6d9dbe80b05f594538 Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Sat, 7 Sep 2024 07:39:31 -0700 Subject: [PATCH 77/79] Update README.md - Meetups Adjusting meetup listings per latest --- README.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 652f5e0751d..5fa04fe29e7 100644 --- a/README.md +++ b/README.md @@ -42,21 +42,19 @@ Keep an eye out for upcoming meetups and events around the world. Somewhere else The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov: -* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 -* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 Other upcoming meetups -* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27 -* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27 -* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 -* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 + * [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10 * [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17 * [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17 +* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18 * [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22 +* [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1 +* [Singapore Meetup](https://www.meetup.com/clickhouse-singapore-meetup-group/events/303212064/) - October 3 * [Madrid Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096564/) - October 22 * [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - October 29 * [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31 @@ -64,7 +62,13 @@ Other upcoming meetups * [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21 * [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26 - +Recently completed events +* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 +* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27 +* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27 +* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 +* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 +* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" From 7ff244d7a3aec0c39bb56ea5d7bb3de57d7ee26e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 7 Sep 2024 20:46:56 +0200 Subject: [PATCH 78/79] Update docs/en/sql-reference/functions/string-replace-functions.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/functions/string-replace-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 283d41d8b73..4417656fc80 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -22,7 +22,7 @@ overlay(s, replace, offset[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the string `s`. +- `offset`: An integer type [Int](../data-types/int-uint.md) (1-based). If `offset` is negative, it is counted from the end of the string `s`. - `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within the input string `s` to be replaced. If `length` is not specified, the number of bytes removed from `s` equals the length of `replace`; otherwise `length` bytes are removed. **Returned value** From 654f8b98de5337474798b60c2708b31cbc0cb0b8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 7 Sep 2024 20:47:03 +0200 Subject: [PATCH 79/79] Update docs/en/sql-reference/functions/string-replace-functions.md Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- docs/en/sql-reference/functions/string-replace-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 4417656fc80..3f50cd24f93 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -72,7 +72,7 @@ overlayUTF8(s, replace, offset[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the input string `s`. +- `offset`: An integer type [Int](../data-types/int-uint.md) (1-based). If `offset` is negative, it is counted from the end of the input string `s`. - `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within the input string `s` to be replaced. If `length` is not specified, the number of characters removed from `s` equals the length of `replace`; otherwise `length` characters are removed. **Returned value**